123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102 |
- import numpy as np
- import pytest
- from pandas._libs import lib
- from pandas import (
- DataFrame,
- Series,
- _testing as tm,
- )
- @pytest.mark.filterwarnings("ignore:Falling back")
- def test_string_array(nullable_string_dtype, any_string_method):
- method_name, args, kwargs = any_string_method
- data = ["a", "bb", np.nan, "ccc"]
- a = Series(data, dtype=object)
- b = Series(data, dtype=nullable_string_dtype)
- if method_name == "decode":
- with pytest.raises(TypeError, match="a bytes-like object is required"):
- getattr(b.str, method_name)(*args, **kwargs)
- return
- expected = getattr(a.str, method_name)(*args, **kwargs)
- result = getattr(b.str, method_name)(*args, **kwargs)
- if isinstance(expected, Series):
- if expected.dtype == "object" and lib.is_string_array(
- expected.dropna().values,
- ):
- assert result.dtype == nullable_string_dtype
- result = result.astype(object)
- elif expected.dtype == "object" and lib.is_bool_array(
- expected.values, skipna=True
- ):
- assert result.dtype == "boolean"
- result = result.astype(object)
- elif expected.dtype == "bool":
- assert result.dtype == "boolean"
- result = result.astype("bool")
- elif expected.dtype == "float" and expected.isna().any():
- assert result.dtype == "Int64"
- result = result.astype("float")
- elif isinstance(expected, DataFrame):
- columns = expected.select_dtypes(include="object").columns
- assert all(result[columns].dtypes == nullable_string_dtype)
- result[columns] = result[columns].astype(object)
- tm.assert_equal(result, expected)
- @pytest.mark.parametrize(
- "method,expected",
- [
- ("count", [2, None]),
- ("find", [0, None]),
- ("index", [0, None]),
- ("rindex", [2, None]),
- ],
- )
- def test_string_array_numeric_integer_array(nullable_string_dtype, method, expected):
- s = Series(["aba", None], dtype=nullable_string_dtype)
- result = getattr(s.str, method)("a")
- expected = Series(expected, dtype="Int64")
- tm.assert_series_equal(result, expected)
- @pytest.mark.parametrize(
- "method,expected",
- [
- ("isdigit", [False, None, True]),
- ("isalpha", [True, None, False]),
- ("isalnum", [True, None, True]),
- ("isnumeric", [False, None, True]),
- ],
- )
- def test_string_array_boolean_array(nullable_string_dtype, method, expected):
- s = Series(["a", None, "1"], dtype=nullable_string_dtype)
- result = getattr(s.str, method)()
- expected = Series(expected, dtype="boolean")
- tm.assert_series_equal(result, expected)
- def test_string_array_extract(nullable_string_dtype):
- # https://github.com/pandas-dev/pandas/issues/30969
- # Only expand=False & multiple groups was failing
- a = Series(["a1", "b2", "cc"], dtype=nullable_string_dtype)
- b = Series(["a1", "b2", "cc"], dtype="object")
- pat = r"(\w)(\d)"
- result = a.str.extract(pat, expand=False)
- expected = b.str.extract(pat, expand=False)
- assert all(result.dtypes == nullable_string_dtype)
- result = result.astype(object)
- tm.assert_equal(result, expected)
|