test_api.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144
  1. import pytest
  2. from pandas import (
  3. DataFrame,
  4. Index,
  5. MultiIndex,
  6. Series,
  7. _testing as tm,
  8. )
  9. from pandas.core.strings.accessor import StringMethods
  10. def test_api(any_string_dtype):
  11. # GH 6106, GH 9322
  12. assert Series.str is StringMethods
  13. assert isinstance(Series([""], dtype=any_string_dtype).str, StringMethods)
  14. def test_api_mi_raises():
  15. # GH 23679
  16. mi = MultiIndex.from_arrays([["a", "b", "c"]])
  17. msg = "Can only use .str accessor with Index, not MultiIndex"
  18. with pytest.raises(AttributeError, match=msg):
  19. mi.str
  20. assert not hasattr(mi, "str")
  21. @pytest.mark.parametrize("dtype", [object, "category"])
  22. def test_api_per_dtype(index_or_series, dtype, any_skipna_inferred_dtype):
  23. # one instance of parametrized fixture
  24. box = index_or_series
  25. inferred_dtype, values = any_skipna_inferred_dtype
  26. t = box(values, dtype=dtype) # explicit dtype to avoid casting
  27. types_passing_constructor = [
  28. "string",
  29. "unicode",
  30. "empty",
  31. "bytes",
  32. "mixed",
  33. "mixed-integer",
  34. ]
  35. if inferred_dtype in types_passing_constructor:
  36. # GH 6106
  37. assert isinstance(t.str, StringMethods)
  38. else:
  39. # GH 9184, GH 23011, GH 23163
  40. msg = "Can only use .str accessor with string values.*"
  41. with pytest.raises(AttributeError, match=msg):
  42. t.str
  43. assert not hasattr(t, "str")
  44. @pytest.mark.parametrize("dtype", [object, "category"])
  45. def test_api_per_method(
  46. index_or_series,
  47. dtype,
  48. any_allowed_skipna_inferred_dtype,
  49. any_string_method,
  50. request,
  51. ):
  52. # this test does not check correctness of the different methods,
  53. # just that the methods work on the specified (inferred) dtypes,
  54. # and raise on all others
  55. box = index_or_series
  56. # one instance of each parametrized fixture
  57. inferred_dtype, values = any_allowed_skipna_inferred_dtype
  58. method_name, args, kwargs = any_string_method
  59. reason = None
  60. if box is Index and values.size == 0:
  61. if method_name in ["partition", "rpartition"] and kwargs.get("expand", True):
  62. raises = TypeError
  63. reason = "Method cannot deal with empty Index"
  64. elif method_name == "split" and kwargs.get("expand", None):
  65. raises = TypeError
  66. reason = "Split fails on empty Series when expand=True"
  67. elif method_name == "get_dummies":
  68. raises = ValueError
  69. reason = "Need to fortify get_dummies corner cases"
  70. elif (
  71. box is Index
  72. and inferred_dtype == "empty"
  73. and dtype == object
  74. and method_name == "get_dummies"
  75. ):
  76. raises = ValueError
  77. reason = "Need to fortify get_dummies corner cases"
  78. if reason is not None:
  79. mark = pytest.mark.xfail(raises=raises, reason=reason)
  80. request.node.add_marker(mark)
  81. t = box(values, dtype=dtype) # explicit dtype to avoid casting
  82. method = getattr(t.str, method_name)
  83. bytes_allowed = method_name in ["decode", "get", "len", "slice"]
  84. # as of v0.23.4, all methods except 'cat' are very lenient with the
  85. # allowed data types, just returning NaN for entries that error.
  86. # This could be changed with an 'errors'-kwarg to the `str`-accessor,
  87. # see discussion in GH 13877
  88. mixed_allowed = method_name not in ["cat"]
  89. allowed_types = (
  90. ["string", "unicode", "empty"]
  91. + ["bytes"] * bytes_allowed
  92. + ["mixed", "mixed-integer"] * mixed_allowed
  93. )
  94. if inferred_dtype in allowed_types:
  95. # xref GH 23555, GH 23556
  96. method(*args, **kwargs) # works!
  97. else:
  98. # GH 23011, GH 23163
  99. msg = (
  100. f"Cannot use .str.{method_name} with values of "
  101. f"inferred dtype {repr(inferred_dtype)}."
  102. )
  103. with pytest.raises(TypeError, match=msg):
  104. method(*args, **kwargs)
  105. def test_api_for_categorical(any_string_method, any_string_dtype):
  106. # https://github.com/pandas-dev/pandas/issues/10661
  107. s = Series(list("aabb"), dtype=any_string_dtype)
  108. s = s + " " + s
  109. c = s.astype("category")
  110. assert isinstance(c.str, StringMethods)
  111. method_name, args, kwargs = any_string_method
  112. result = getattr(c.str, method_name)(*args, **kwargs)
  113. expected = getattr(s.astype("object").str, method_name)(*args, **kwargs)
  114. if isinstance(result, DataFrame):
  115. tm.assert_frame_equal(result, expected)
  116. elif isinstance(result, Series):
  117. tm.assert_series_equal(result, expected)
  118. else:
  119. # str.cat(others=None) returns string, for example
  120. assert result == expected