test_string_array.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. import numpy as np
  2. import pytest
  3. from pandas._libs import lib
  4. from pandas import (
  5. DataFrame,
  6. Series,
  7. _testing as tm,
  8. )
  9. @pytest.mark.filterwarnings("ignore:Falling back")
  10. def test_string_array(nullable_string_dtype, any_string_method):
  11. method_name, args, kwargs = any_string_method
  12. data = ["a", "bb", np.nan, "ccc"]
  13. a = Series(data, dtype=object)
  14. b = Series(data, dtype=nullable_string_dtype)
  15. if method_name == "decode":
  16. with pytest.raises(TypeError, match="a bytes-like object is required"):
  17. getattr(b.str, method_name)(*args, **kwargs)
  18. return
  19. expected = getattr(a.str, method_name)(*args, **kwargs)
  20. result = getattr(b.str, method_name)(*args, **kwargs)
  21. if isinstance(expected, Series):
  22. if expected.dtype == "object" and lib.is_string_array(
  23. expected.dropna().values,
  24. ):
  25. assert result.dtype == nullable_string_dtype
  26. result = result.astype(object)
  27. elif expected.dtype == "object" and lib.is_bool_array(
  28. expected.values, skipna=True
  29. ):
  30. assert result.dtype == "boolean"
  31. result = result.astype(object)
  32. elif expected.dtype == "bool":
  33. assert result.dtype == "boolean"
  34. result = result.astype("bool")
  35. elif expected.dtype == "float" and expected.isna().any():
  36. assert result.dtype == "Int64"
  37. result = result.astype("float")
  38. elif isinstance(expected, DataFrame):
  39. columns = expected.select_dtypes(include="object").columns
  40. assert all(result[columns].dtypes == nullable_string_dtype)
  41. result[columns] = result[columns].astype(object)
  42. tm.assert_equal(result, expected)
  43. @pytest.mark.parametrize(
  44. "method,expected",
  45. [
  46. ("count", [2, None]),
  47. ("find", [0, None]),
  48. ("index", [0, None]),
  49. ("rindex", [2, None]),
  50. ],
  51. )
  52. def test_string_array_numeric_integer_array(nullable_string_dtype, method, expected):
  53. s = Series(["aba", None], dtype=nullable_string_dtype)
  54. result = getattr(s.str, method)("a")
  55. expected = Series(expected, dtype="Int64")
  56. tm.assert_series_equal(result, expected)
  57. @pytest.mark.parametrize(
  58. "method,expected",
  59. [
  60. ("isdigit", [False, None, True]),
  61. ("isalpha", [True, None, False]),
  62. ("isalnum", [True, None, True]),
  63. ("isnumeric", [False, None, True]),
  64. ],
  65. )
  66. def test_string_array_boolean_array(nullable_string_dtype, method, expected):
  67. s = Series(["a", None, "1"], dtype=nullable_string_dtype)
  68. result = getattr(s.str, method)()
  69. expected = Series(expected, dtype="boolean")
  70. tm.assert_series_equal(result, expected)
  71. def test_string_array_extract(nullable_string_dtype):
  72. # https://github.com/pandas-dev/pandas/issues/30969
  73. # Only expand=False & multiple groups was failing
  74. a = Series(["a1", "b2", "cc"], dtype=nullable_string_dtype)
  75. b = Series(["a1", "b2", "cc"], dtype="object")
  76. pat = r"(\w)(\d)"
  77. result = a.str.extract(pat, expand=False)
  78. expected = b.str.extract(pat, expand=False)
  79. assert all(result.dtypes == nullable_string_dtype)
  80. result = result.astype(object)
  81. tm.assert_equal(result, expected)