test_operators.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. import warnings
  2. import numpy as np
  3. import pytest
  4. import pandas as pd
  5. from pandas import (
  6. Categorical,
  7. DataFrame,
  8. Series,
  9. date_range,
  10. )
  11. import pandas._testing as tm
  12. class TestCategoricalOpsWithFactor:
  13. def test_categories_none_comparisons(self):
  14. factor = Categorical(["a", "b", "b", "a", "a", "c", "c", "c"], ordered=True)
  15. tm.assert_categorical_equal(factor, factor)
  16. def test_comparisons(self, factor):
  17. result = factor[factor == "a"]
  18. expected = factor[np.asarray(factor) == "a"]
  19. tm.assert_categorical_equal(result, expected)
  20. result = factor[factor != "a"]
  21. expected = factor[np.asarray(factor) != "a"]
  22. tm.assert_categorical_equal(result, expected)
  23. result = factor[factor < "c"]
  24. expected = factor[np.asarray(factor) < "c"]
  25. tm.assert_categorical_equal(result, expected)
  26. result = factor[factor > "a"]
  27. expected = factor[np.asarray(factor) > "a"]
  28. tm.assert_categorical_equal(result, expected)
  29. result = factor[factor >= "b"]
  30. expected = factor[np.asarray(factor) >= "b"]
  31. tm.assert_categorical_equal(result, expected)
  32. result = factor[factor <= "b"]
  33. expected = factor[np.asarray(factor) <= "b"]
  34. tm.assert_categorical_equal(result, expected)
  35. n = len(factor)
  36. other = factor[np.random.permutation(n)]
  37. result = factor == other
  38. expected = np.asarray(factor) == np.asarray(other)
  39. tm.assert_numpy_array_equal(result, expected)
  40. result = factor == "d"
  41. expected = np.zeros(len(factor), dtype=bool)
  42. tm.assert_numpy_array_equal(result, expected)
  43. # comparisons with categoricals
  44. cat_rev = Categorical(["a", "b", "c"], categories=["c", "b", "a"], ordered=True)
  45. cat_rev_base = Categorical(
  46. ["b", "b", "b"], categories=["c", "b", "a"], ordered=True
  47. )
  48. cat = Categorical(["a", "b", "c"], ordered=True)
  49. cat_base = Categorical(["b", "b", "b"], categories=cat.categories, ordered=True)
  50. # comparisons need to take categories ordering into account
  51. res_rev = cat_rev > cat_rev_base
  52. exp_rev = np.array([True, False, False])
  53. tm.assert_numpy_array_equal(res_rev, exp_rev)
  54. res_rev = cat_rev < cat_rev_base
  55. exp_rev = np.array([False, False, True])
  56. tm.assert_numpy_array_equal(res_rev, exp_rev)
  57. res = cat > cat_base
  58. exp = np.array([False, False, True])
  59. tm.assert_numpy_array_equal(res, exp)
  60. # Only categories with same categories can be compared
  61. msg = "Categoricals can only be compared if 'categories' are the same"
  62. with pytest.raises(TypeError, match=msg):
  63. cat > cat_rev
  64. cat_rev_base2 = Categorical(["b", "b", "b"], categories=["c", "b", "a", "d"])
  65. with pytest.raises(TypeError, match=msg):
  66. cat_rev > cat_rev_base2
  67. # Only categories with same ordering information can be compared
  68. cat_unorderd = cat.set_ordered(False)
  69. assert not (cat > cat).any()
  70. with pytest.raises(TypeError, match=msg):
  71. cat > cat_unorderd
  72. # comparison (in both directions) with Series will raise
  73. s = Series(["b", "b", "b"])
  74. msg = (
  75. "Cannot compare a Categorical for op __gt__ with type "
  76. r"<class 'numpy\.ndarray'>"
  77. )
  78. with pytest.raises(TypeError, match=msg):
  79. cat > s
  80. with pytest.raises(TypeError, match=msg):
  81. cat_rev > s
  82. with pytest.raises(TypeError, match=msg):
  83. s < cat
  84. with pytest.raises(TypeError, match=msg):
  85. s < cat_rev
  86. # comparison with numpy.array will raise in both direction, but only on
  87. # newer numpy versions
  88. a = np.array(["b", "b", "b"])
  89. with pytest.raises(TypeError, match=msg):
  90. cat > a
  91. with pytest.raises(TypeError, match=msg):
  92. cat_rev > a
  93. # Make sure that unequal comparison take the categories order in
  94. # account
  95. cat_rev = Categorical(list("abc"), categories=list("cba"), ordered=True)
  96. exp = np.array([True, False, False])
  97. res = cat_rev > "b"
  98. tm.assert_numpy_array_equal(res, exp)
  99. # check that zero-dim array gets unboxed
  100. res = cat_rev > np.array("b")
  101. tm.assert_numpy_array_equal(res, exp)
  102. class TestCategoricalOps:
  103. def test_compare_frame(self):
  104. # GH#24282 check that Categorical.__cmp__(DataFrame) defers to frame
  105. data = ["a", "b", 2, "a"]
  106. cat = Categorical(data)
  107. df = DataFrame(cat)
  108. result = cat == df.T
  109. expected = DataFrame([[True, True, True, True]])
  110. tm.assert_frame_equal(result, expected)
  111. result = cat[::-1] != df.T
  112. expected = DataFrame([[False, True, True, False]])
  113. tm.assert_frame_equal(result, expected)
  114. def test_compare_frame_raises(self, comparison_op):
  115. # alignment raises unless we transpose
  116. op = comparison_op
  117. cat = Categorical(["a", "b", 2, "a"])
  118. df = DataFrame(cat)
  119. msg = "Unable to coerce to Series, length must be 1: given 4"
  120. with pytest.raises(ValueError, match=msg):
  121. op(cat, df)
  122. def test_datetime_categorical_comparison(self):
  123. dt_cat = Categorical(date_range("2014-01-01", periods=3), ordered=True)
  124. tm.assert_numpy_array_equal(dt_cat > dt_cat[0], np.array([False, True, True]))
  125. tm.assert_numpy_array_equal(dt_cat[0] < dt_cat, np.array([False, True, True]))
  126. def test_reflected_comparison_with_scalars(self):
  127. # GH8658
  128. cat = Categorical([1, 2, 3], ordered=True)
  129. tm.assert_numpy_array_equal(cat > cat[0], np.array([False, True, True]))
  130. tm.assert_numpy_array_equal(cat[0] < cat, np.array([False, True, True]))
  131. def test_comparison_with_unknown_scalars(self):
  132. # https://github.com/pandas-dev/pandas/issues/9836#issuecomment-92123057
  133. # and following comparisons with scalars not in categories should raise
  134. # for unequal comps, but not for equal/not equal
  135. cat = Categorical([1, 2, 3], ordered=True)
  136. msg = "Invalid comparison between dtype=category and int"
  137. with pytest.raises(TypeError, match=msg):
  138. cat < 4
  139. with pytest.raises(TypeError, match=msg):
  140. cat > 4
  141. with pytest.raises(TypeError, match=msg):
  142. 4 < cat
  143. with pytest.raises(TypeError, match=msg):
  144. 4 > cat
  145. tm.assert_numpy_array_equal(cat == 4, np.array([False, False, False]))
  146. tm.assert_numpy_array_equal(cat != 4, np.array([True, True, True]))
  147. def test_comparison_with_tuple(self):
  148. cat = Categorical(np.array(["foo", (0, 1), 3, (0, 1)], dtype=object))
  149. result = cat == "foo"
  150. expected = np.array([True, False, False, False], dtype=bool)
  151. tm.assert_numpy_array_equal(result, expected)
  152. result = cat == (0, 1)
  153. expected = np.array([False, True, False, True], dtype=bool)
  154. tm.assert_numpy_array_equal(result, expected)
  155. result = cat != (0, 1)
  156. tm.assert_numpy_array_equal(result, ~expected)
  157. def test_comparison_of_ordered_categorical_with_nan_to_scalar(
  158. self, compare_operators_no_eq_ne
  159. ):
  160. # https://github.com/pandas-dev/pandas/issues/26504
  161. # BUG: fix ordered categorical comparison with missing values (#26504 )
  162. # and following comparisons with scalars in categories with missing
  163. # values should be evaluated as False
  164. cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
  165. scalar = 2
  166. with warnings.catch_warnings():
  167. warnings.simplefilter("ignore", RuntimeWarning)
  168. expected = getattr(np.array(cat), compare_operators_no_eq_ne)(scalar)
  169. actual = getattr(cat, compare_operators_no_eq_ne)(scalar)
  170. tm.assert_numpy_array_equal(actual, expected)
  171. def test_comparison_of_ordered_categorical_with_nan_to_listlike(
  172. self, compare_operators_no_eq_ne
  173. ):
  174. # https://github.com/pandas-dev/pandas/issues/26504
  175. # and following comparisons of missing values in ordered Categorical
  176. # with listlike should be evaluated as False
  177. cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True)
  178. other = Categorical([2, 2, 2, 2], categories=[1, 2, 3], ordered=True)
  179. with warnings.catch_warnings():
  180. warnings.simplefilter("ignore", RuntimeWarning)
  181. expected = getattr(np.array(cat), compare_operators_no_eq_ne)(2)
  182. actual = getattr(cat, compare_operators_no_eq_ne)(other)
  183. tm.assert_numpy_array_equal(actual, expected)
  184. @pytest.mark.parametrize(
  185. "data,reverse,base",
  186. [(list("abc"), list("cba"), list("bbb")), ([1, 2, 3], [3, 2, 1], [2, 2, 2])],
  187. )
  188. def test_comparisons(self, data, reverse, base):
  189. cat_rev = Series(Categorical(data, categories=reverse, ordered=True))
  190. cat_rev_base = Series(Categorical(base, categories=reverse, ordered=True))
  191. cat = Series(Categorical(data, ordered=True))
  192. cat_base = Series(
  193. Categorical(base, categories=cat.cat.categories, ordered=True)
  194. )
  195. s = Series(base)
  196. a = np.array(base)
  197. # comparisons need to take categories ordering into account
  198. res_rev = cat_rev > cat_rev_base
  199. exp_rev = Series([True, False, False])
  200. tm.assert_series_equal(res_rev, exp_rev)
  201. res_rev = cat_rev < cat_rev_base
  202. exp_rev = Series([False, False, True])
  203. tm.assert_series_equal(res_rev, exp_rev)
  204. res = cat > cat_base
  205. exp = Series([False, False, True])
  206. tm.assert_series_equal(res, exp)
  207. scalar = base[1]
  208. res = cat > scalar
  209. exp = Series([False, False, True])
  210. exp2 = cat.values > scalar
  211. tm.assert_series_equal(res, exp)
  212. tm.assert_numpy_array_equal(res.values, exp2)
  213. res_rev = cat_rev > scalar
  214. exp_rev = Series([True, False, False])
  215. exp_rev2 = cat_rev.values > scalar
  216. tm.assert_series_equal(res_rev, exp_rev)
  217. tm.assert_numpy_array_equal(res_rev.values, exp_rev2)
  218. # Only categories with same categories can be compared
  219. msg = "Categoricals can only be compared if 'categories' are the same"
  220. with pytest.raises(TypeError, match=msg):
  221. cat > cat_rev
  222. # categorical cannot be compared to Series or numpy array, and also
  223. # not the other way around
  224. msg = (
  225. "Cannot compare a Categorical for op __gt__ with type "
  226. r"<class 'numpy\.ndarray'>"
  227. )
  228. with pytest.raises(TypeError, match=msg):
  229. cat > s
  230. with pytest.raises(TypeError, match=msg):
  231. cat_rev > s
  232. with pytest.raises(TypeError, match=msg):
  233. cat > a
  234. with pytest.raises(TypeError, match=msg):
  235. cat_rev > a
  236. with pytest.raises(TypeError, match=msg):
  237. s < cat
  238. with pytest.raises(TypeError, match=msg):
  239. s < cat_rev
  240. with pytest.raises(TypeError, match=msg):
  241. a < cat
  242. with pytest.raises(TypeError, match=msg):
  243. a < cat_rev
  244. @pytest.mark.parametrize(
  245. "ctor",
  246. [
  247. lambda *args, **kwargs: Categorical(*args, **kwargs),
  248. lambda *args, **kwargs: Series(Categorical(*args, **kwargs)),
  249. ],
  250. )
  251. def test_unordered_different_order_equal(self, ctor):
  252. # https://github.com/pandas-dev/pandas/issues/16014
  253. c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False)
  254. c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False)
  255. assert (c1 == c2).all()
  256. c1 = ctor(["a", "b"], categories=["a", "b"], ordered=False)
  257. c2 = ctor(["b", "a"], categories=["b", "a"], ordered=False)
  258. assert (c1 != c2).all()
  259. c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False)
  260. c2 = ctor(["b", "b"], categories=["b", "a"], ordered=False)
  261. assert (c1 != c2).all()
  262. c1 = ctor(["a", "a"], categories=["a", "b"], ordered=False)
  263. c2 = ctor(["a", "b"], categories=["b", "a"], ordered=False)
  264. result = c1 == c2
  265. tm.assert_numpy_array_equal(np.array(result), np.array([True, False]))
  266. def test_unordered_different_categories_raises(self):
  267. c1 = Categorical(["a", "b"], categories=["a", "b"], ordered=False)
  268. c2 = Categorical(["a", "c"], categories=["c", "a"], ordered=False)
  269. with pytest.raises(TypeError, match=("Categoricals can only be compared")):
  270. c1 == c2
  271. def test_compare_different_lengths(self):
  272. c1 = Categorical([], categories=["a", "b"])
  273. c2 = Categorical([], categories=["a"])
  274. msg = "Categoricals can only be compared if 'categories' are the same."
  275. with pytest.raises(TypeError, match=msg):
  276. c1 == c2
  277. def test_compare_unordered_different_order(self):
  278. # https://github.com/pandas-dev/pandas/issues/16603#issuecomment-
  279. # 349290078
  280. a = Categorical(["a"], categories=["a", "b"])
  281. b = Categorical(["b"], categories=["b", "a"])
  282. assert not a.equals(b)
  283. def test_numeric_like_ops(self):
  284. df = DataFrame({"value": np.random.randint(0, 10000, 100)})
  285. labels = [f"{i} - {i + 499}" for i in range(0, 10000, 500)]
  286. cat_labels = Categorical(labels, labels)
  287. df = df.sort_values(by=["value"], ascending=True)
  288. df["value_group"] = pd.cut(
  289. df.value, range(0, 10500, 500), right=False, labels=cat_labels
  290. )
  291. # numeric ops should not succeed
  292. for op, str_rep in [
  293. ("__add__", r"\+"),
  294. ("__sub__", "-"),
  295. ("__mul__", r"\*"),
  296. ("__truediv__", "/"),
  297. ]:
  298. msg = f"Series cannot perform the operation {str_rep}|unsupported operand"
  299. with pytest.raises(TypeError, match=msg):
  300. getattr(df, op)(df)
  301. # reduction ops should not succeed (unless specifically defined, e.g.
  302. # min/max)
  303. s = df["value_group"]
  304. for op in ["kurt", "skew", "var", "std", "mean", "sum", "median"]:
  305. msg = f"does not support reduction '{op}'"
  306. with pytest.raises(TypeError, match=msg):
  307. getattr(s, op)(numeric_only=False)
  308. def test_numeric_like_ops_series(self):
  309. # numpy ops
  310. s = Series(Categorical([1, 2, 3, 4]))
  311. with pytest.raises(TypeError, match="does not support reduction 'sum'"):
  312. np.sum(s)
  313. @pytest.mark.parametrize(
  314. "op, str_rep",
  315. [
  316. ("__add__", r"\+"),
  317. ("__sub__", "-"),
  318. ("__mul__", r"\*"),
  319. ("__truediv__", "/"),
  320. ],
  321. )
  322. def test_numeric_like_ops_series_arith(self, op, str_rep):
  323. # numeric ops on a Series
  324. s = Series(Categorical([1, 2, 3, 4]))
  325. msg = f"Series cannot perform the operation {str_rep}|unsupported operand"
  326. with pytest.raises(TypeError, match=msg):
  327. getattr(s, op)(2)
  328. def test_numeric_like_ops_series_invalid(self):
  329. # invalid ufunc
  330. s = Series(Categorical([1, 2, 3, 4]))
  331. msg = "Object with dtype category cannot perform the numpy op log"
  332. with pytest.raises(TypeError, match=msg):
  333. np.log(s)