test_replace.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. import pytest
  2. import pandas as pd
  3. from pandas import Categorical
  4. import pandas._testing as tm
  5. @pytest.mark.parametrize(
  6. "to_replace,value,expected,flip_categories",
  7. [
  8. # one-to-one
  9. (1, 2, [2, 2, 3], False),
  10. (1, 4, [4, 2, 3], False),
  11. (4, 1, [1, 2, 3], False),
  12. (5, 6, [1, 2, 3], False),
  13. # many-to-one
  14. ([1], 2, [2, 2, 3], False),
  15. ([1, 2], 3, [3, 3, 3], False),
  16. ([1, 2], 4, [4, 4, 3], False),
  17. ((1, 2, 4), 5, [5, 5, 3], False),
  18. ((5, 6), 2, [1, 2, 3], False),
  19. ([1], [2], [2, 2, 3], False),
  20. ([1, 4], [5, 2], [5, 2, 3], False),
  21. # GH49404: overlap between to_replace and value
  22. ([1, 2, 3], [2, 3, 4], [2, 3, 4], False),
  23. # GH50872, GH46884: replace with null
  24. (1, None, [None, 2, 3], False),
  25. (1, pd.NA, [None, 2, 3], False),
  26. # check_categorical sorts categories, which crashes on mixed dtypes
  27. (3, "4", [1, 2, "4"], False),
  28. ([1, 2, "3"], "5", ["5", "5", 3], True),
  29. ],
  30. )
  31. def test_replace_categorical_series(to_replace, value, expected, flip_categories):
  32. # GH 31720
  33. ser = pd.Series([1, 2, 3], dtype="category")
  34. result = ser.replace(to_replace, value)
  35. expected = pd.Series(expected, dtype="category")
  36. ser.replace(to_replace, value, inplace=True)
  37. if flip_categories:
  38. expected = expected.cat.set_categories(expected.cat.categories[::-1])
  39. tm.assert_series_equal(expected, result, check_category_order=False)
  40. tm.assert_series_equal(expected, ser, check_category_order=False)
  41. @pytest.mark.parametrize(
  42. "to_replace, value, result, expected_error_msg",
  43. [
  44. ("b", "c", ["a", "c"], "Categorical.categories are different"),
  45. ("c", "d", ["a", "b"], None),
  46. # https://github.com/pandas-dev/pandas/issues/33288
  47. ("a", "a", ["a", "b"], None),
  48. ("b", None, ["a", None], "Categorical.categories length are different"),
  49. ],
  50. )
  51. def test_replace_categorical(to_replace, value, result, expected_error_msg):
  52. # GH#26988
  53. cat = Categorical(["a", "b"])
  54. expected = Categorical(result)
  55. result = pd.Series(cat, copy=False).replace(to_replace, value)._values
  56. tm.assert_categorical_equal(result, expected)
  57. if to_replace == "b": # the "c" test is supposed to be unchanged
  58. with pytest.raises(AssertionError, match=expected_error_msg):
  59. # ensure non-inplace call does not affect original
  60. tm.assert_categorical_equal(cat, expected)
  61. pd.Series(cat, copy=False).replace(to_replace, value, inplace=True)
  62. tm.assert_categorical_equal(cat, expected)
  63. def test_replace_categorical_ea_dtype():
  64. # GH49404
  65. cat = Categorical(pd.array(["a", "b"], dtype="string"))
  66. result = pd.Series(cat).replace(["a", "b"], ["c", pd.NA])._values
  67. expected = Categorical(pd.array(["c", pd.NA], dtype="string"))
  68. tm.assert_categorical_equal(result, expected)
  69. def test_replace_maintain_ordering():
  70. # GH51016
  71. dtype = pd.CategoricalDtype([0, 1, 2], ordered=True)
  72. ser = pd.Series([0, 1, 2], dtype=dtype)
  73. result = ser.replace(0, 2)
  74. expected_dtype = pd.CategoricalDtype([1, 2], ordered=True)
  75. expected = pd.Series([2, 1, 2], dtype=expected_dtype)
  76. tm.assert_series_equal(expected, result, check_category_order=True)