test_algos.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. import pandas._testing as tm
  5. @pytest.mark.parametrize("ordered", [True, False])
  6. @pytest.mark.parametrize("categories", [["b", "a", "c"], ["a", "b", "c", "d"]])
  7. def test_factorize(categories, ordered):
  8. cat = pd.Categorical(
  9. ["b", "b", "a", "c", None], categories=categories, ordered=ordered
  10. )
  11. codes, uniques = pd.factorize(cat)
  12. expected_codes = np.array([0, 0, 1, 2, -1], dtype=np.intp)
  13. expected_uniques = pd.Categorical(
  14. ["b", "a", "c"], categories=categories, ordered=ordered
  15. )
  16. tm.assert_numpy_array_equal(codes, expected_codes)
  17. tm.assert_categorical_equal(uniques, expected_uniques)
  18. def test_factorized_sort():
  19. cat = pd.Categorical(["b", "b", None, "a"])
  20. codes, uniques = pd.factorize(cat, sort=True)
  21. expected_codes = np.array([1, 1, -1, 0], dtype=np.intp)
  22. expected_uniques = pd.Categorical(["a", "b"])
  23. tm.assert_numpy_array_equal(codes, expected_codes)
  24. tm.assert_categorical_equal(uniques, expected_uniques)
  25. def test_factorized_sort_ordered():
  26. cat = pd.Categorical(
  27. ["b", "b", None, "a"], categories=["c", "b", "a"], ordered=True
  28. )
  29. codes, uniques = pd.factorize(cat, sort=True)
  30. expected_codes = np.array([0, 0, -1, 1], dtype=np.intp)
  31. expected_uniques = pd.Categorical(
  32. ["b", "a"], categories=["c", "b", "a"], ordered=True
  33. )
  34. tm.assert_numpy_array_equal(codes, expected_codes)
  35. tm.assert_categorical_equal(uniques, expected_uniques)
  36. def test_isin_cats():
  37. # GH2003
  38. cat = pd.Categorical(["a", "b", np.nan])
  39. result = cat.isin(["a", np.nan])
  40. expected = np.array([True, False, True], dtype=bool)
  41. tm.assert_numpy_array_equal(expected, result)
  42. result = cat.isin(["a", "c"])
  43. expected = np.array([True, False, False], dtype=bool)
  44. tm.assert_numpy_array_equal(expected, result)
  45. @pytest.mark.parametrize("empty", [[], pd.Series(dtype=object), np.array([])])
  46. def test_isin_empty(empty):
  47. s = pd.Categorical(["a", "b"])
  48. expected = np.array([False, False], dtype=bool)
  49. result = s.isin(empty)
  50. tm.assert_numpy_array_equal(expected, result)
  51. def test_diff():
  52. ser = pd.Series([1, 2, 3], dtype="category")
  53. msg = "Convert to a suitable dtype"
  54. with pytest.raises(TypeError, match=msg):
  55. ser.diff()
  56. df = ser.to_frame(name="A")
  57. with pytest.raises(TypeError, match=msg):
  58. df.diff()