test_object.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. # Arithmetic tests for DataFrame/Series/Index/Array classes that should
  2. # behave identically.
  3. # Specifically for object dtype
  4. import datetime
  5. from decimal import Decimal
  6. import operator
  7. import numpy as np
  8. import pytest
  9. import pandas as pd
  10. from pandas import (
  11. Series,
  12. Timestamp,
  13. )
  14. import pandas._testing as tm
  15. from pandas.core import ops
  16. # ------------------------------------------------------------------
  17. # Comparisons
  18. class TestObjectComparisons:
  19. def test_comparison_object_numeric_nas(self, comparison_op):
  20. ser = Series(np.random.randn(10), dtype=object)
  21. shifted = ser.shift(2)
  22. func = comparison_op
  23. result = func(ser, shifted)
  24. expected = func(ser.astype(float), shifted.astype(float))
  25. tm.assert_series_equal(result, expected)
  26. def test_object_comparisons(self):
  27. ser = Series(["a", "b", np.nan, "c", "a"])
  28. result = ser == "a"
  29. expected = Series([True, False, False, False, True])
  30. tm.assert_series_equal(result, expected)
  31. result = ser < "a"
  32. expected = Series([False, False, False, False, False])
  33. tm.assert_series_equal(result, expected)
  34. result = ser != "a"
  35. expected = -(ser == "a")
  36. tm.assert_series_equal(result, expected)
  37. @pytest.mark.parametrize("dtype", [None, object])
  38. def test_more_na_comparisons(self, dtype):
  39. left = Series(["a", np.nan, "c"], dtype=dtype)
  40. right = Series(["a", np.nan, "d"], dtype=dtype)
  41. result = left == right
  42. expected = Series([True, False, False])
  43. tm.assert_series_equal(result, expected)
  44. result = left != right
  45. expected = Series([False, True, True])
  46. tm.assert_series_equal(result, expected)
  47. result = left == np.nan
  48. expected = Series([False, False, False])
  49. tm.assert_series_equal(result, expected)
  50. result = left != np.nan
  51. expected = Series([True, True, True])
  52. tm.assert_series_equal(result, expected)
  53. # ------------------------------------------------------------------
  54. # Arithmetic
  55. class TestArithmetic:
  56. def test_add_period_to_array_of_offset(self):
  57. # GH#50162
  58. per = pd.Period("2012-1-1", freq="D")
  59. pi = pd.period_range("2012-1-1", periods=10, freq="D")
  60. idx = per - pi
  61. expected = pd.Index([x + per for x in idx], dtype=object)
  62. result = idx + per
  63. tm.assert_index_equal(result, expected)
  64. result = per + idx
  65. tm.assert_index_equal(result, expected)
  66. # TODO: parametrize
  67. def test_pow_ops_object(self):
  68. # GH#22922
  69. # pow is weird with masking & 1, so testing here
  70. a = Series([1, np.nan, 1, np.nan], dtype=object)
  71. b = Series([1, np.nan, np.nan, 1], dtype=object)
  72. result = a**b
  73. expected = Series(a.values**b.values, dtype=object)
  74. tm.assert_series_equal(result, expected)
  75. result = b**a
  76. expected = Series(b.values**a.values, dtype=object)
  77. tm.assert_series_equal(result, expected)
  78. @pytest.mark.parametrize("op", [operator.add, ops.radd])
  79. @pytest.mark.parametrize("other", ["category", "Int64"])
  80. def test_add_extension_scalar(self, other, box_with_array, op):
  81. # GH#22378
  82. # Check that scalars satisfying is_extension_array_dtype(obj)
  83. # do not incorrectly try to dispatch to an ExtensionArray operation
  84. arr = Series(["a", "b", "c"])
  85. expected = Series([op(x, other) for x in arr])
  86. arr = tm.box_expected(arr, box_with_array)
  87. expected = tm.box_expected(expected, box_with_array)
  88. result = op(arr, other)
  89. tm.assert_equal(result, expected)
  90. def test_objarr_add_str(self, box_with_array):
  91. ser = Series(["x", np.nan, "x"])
  92. expected = Series(["xa", np.nan, "xa"])
  93. ser = tm.box_expected(ser, box_with_array)
  94. expected = tm.box_expected(expected, box_with_array)
  95. result = ser + "a"
  96. tm.assert_equal(result, expected)
  97. def test_objarr_radd_str(self, box_with_array):
  98. ser = Series(["x", np.nan, "x"])
  99. expected = Series(["ax", np.nan, "ax"])
  100. ser = tm.box_expected(ser, box_with_array)
  101. expected = tm.box_expected(expected, box_with_array)
  102. result = "a" + ser
  103. tm.assert_equal(result, expected)
  104. @pytest.mark.parametrize(
  105. "data",
  106. [
  107. [1, 2, 3],
  108. [1.1, 2.2, 3.3],
  109. [Timestamp("2011-01-01"), Timestamp("2011-01-02"), pd.NaT],
  110. ["x", "y", 1],
  111. ],
  112. )
  113. @pytest.mark.parametrize("dtype", [None, object])
  114. def test_objarr_radd_str_invalid(self, dtype, data, box_with_array):
  115. ser = Series(data, dtype=dtype)
  116. ser = tm.box_expected(ser, box_with_array)
  117. msg = "|".join(
  118. [
  119. "can only concatenate str",
  120. "did not contain a loop with signature matching types",
  121. "unsupported operand type",
  122. "must be str",
  123. ]
  124. )
  125. with pytest.raises(TypeError, match=msg):
  126. "foo_" + ser
  127. @pytest.mark.parametrize("op", [operator.add, ops.radd, operator.sub, ops.rsub])
  128. def test_objarr_add_invalid(self, op, box_with_array):
  129. # invalid ops
  130. box = box_with_array
  131. obj_ser = tm.makeObjectSeries()
  132. obj_ser.name = "objects"
  133. obj_ser = tm.box_expected(obj_ser, box)
  134. msg = "|".join(
  135. ["can only concatenate str", "unsupported operand type", "must be str"]
  136. )
  137. with pytest.raises(Exception, match=msg):
  138. op(obj_ser, 1)
  139. with pytest.raises(Exception, match=msg):
  140. op(obj_ser, np.array(1, dtype=np.int64))
  141. # TODO: Moved from tests.series.test_operators; needs cleanup
  142. def test_operators_na_handling(self):
  143. ser = Series(["foo", "bar", "baz", np.nan])
  144. result = "prefix_" + ser
  145. expected = Series(["prefix_foo", "prefix_bar", "prefix_baz", np.nan])
  146. tm.assert_series_equal(result, expected)
  147. result = ser + "_suffix"
  148. expected = Series(["foo_suffix", "bar_suffix", "baz_suffix", np.nan])
  149. tm.assert_series_equal(result, expected)
  150. # TODO: parametrize over box
  151. @pytest.mark.parametrize("dtype", [None, object])
  152. def test_series_with_dtype_radd_timedelta(self, dtype):
  153. # note this test is _not_ aimed at timedelta64-dtyped Series
  154. # as of 2.0 we retain object dtype when ser.dtype == object
  155. ser = Series(
  156. [pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")],
  157. dtype=dtype,
  158. )
  159. expected = Series(
  160. [pd.Timedelta("4 days"), pd.Timedelta("5 days"), pd.Timedelta("6 days")],
  161. dtype=dtype,
  162. )
  163. result = pd.Timedelta("3 days") + ser
  164. tm.assert_series_equal(result, expected)
  165. result = ser + pd.Timedelta("3 days")
  166. tm.assert_series_equal(result, expected)
  167. # TODO: cleanup & parametrize over box
  168. def test_mixed_timezone_series_ops_object(self):
  169. # GH#13043
  170. ser = Series(
  171. [
  172. Timestamp("2015-01-01", tz="US/Eastern"),
  173. Timestamp("2015-01-01", tz="Asia/Tokyo"),
  174. ],
  175. name="xxx",
  176. )
  177. assert ser.dtype == object
  178. exp = Series(
  179. [
  180. Timestamp("2015-01-02", tz="US/Eastern"),
  181. Timestamp("2015-01-02", tz="Asia/Tokyo"),
  182. ],
  183. name="xxx",
  184. )
  185. tm.assert_series_equal(ser + pd.Timedelta("1 days"), exp)
  186. tm.assert_series_equal(pd.Timedelta("1 days") + ser, exp)
  187. # object series & object series
  188. ser2 = Series(
  189. [
  190. Timestamp("2015-01-03", tz="US/Eastern"),
  191. Timestamp("2015-01-05", tz="Asia/Tokyo"),
  192. ],
  193. name="xxx",
  194. )
  195. assert ser2.dtype == object
  196. exp = Series(
  197. [pd.Timedelta("2 days"), pd.Timedelta("4 days")], name="xxx", dtype=object
  198. )
  199. tm.assert_series_equal(ser2 - ser, exp)
  200. tm.assert_series_equal(ser - ser2, -exp)
  201. ser = Series(
  202. [pd.Timedelta("01:00:00"), pd.Timedelta("02:00:00")],
  203. name="xxx",
  204. dtype=object,
  205. )
  206. assert ser.dtype == object
  207. exp = Series(
  208. [pd.Timedelta("01:30:00"), pd.Timedelta("02:30:00")],
  209. name="xxx",
  210. dtype=object,
  211. )
  212. tm.assert_series_equal(ser + pd.Timedelta("00:30:00"), exp)
  213. tm.assert_series_equal(pd.Timedelta("00:30:00") + ser, exp)
  214. # TODO: cleanup & parametrize over box
  215. def test_iadd_preserves_name(self):
  216. # GH#17067, GH#19723 __iadd__ and __isub__ should preserve index name
  217. ser = Series([1, 2, 3])
  218. ser.index.name = "foo"
  219. ser.index += 1
  220. assert ser.index.name == "foo"
  221. ser.index -= 1
  222. assert ser.index.name == "foo"
  223. def test_add_string(self):
  224. # from bug report
  225. index = pd.Index(["a", "b", "c"])
  226. index2 = index + "foo"
  227. assert "a" not in index2
  228. assert "afoo" in index2
  229. def test_iadd_string(self):
  230. index = pd.Index(["a", "b", "c"])
  231. # doesn't fail test unless there is a check before `+=`
  232. assert "a" in index
  233. index += "_x"
  234. assert "a_x" in index
  235. def test_add(self):
  236. index = tm.makeStringIndex(100)
  237. expected = pd.Index(index.values * 2)
  238. tm.assert_index_equal(index + index, expected)
  239. tm.assert_index_equal(index + index.tolist(), expected)
  240. tm.assert_index_equal(index.tolist() + index, expected)
  241. # test add and radd
  242. index = pd.Index(list("abc"))
  243. expected = pd.Index(["a1", "b1", "c1"])
  244. tm.assert_index_equal(index + "1", expected)
  245. expected = pd.Index(["1a", "1b", "1c"])
  246. tm.assert_index_equal("1" + index, expected)
  247. def test_sub_fail(self):
  248. index = tm.makeStringIndex(100)
  249. msg = "unsupported operand type|Cannot broadcast"
  250. with pytest.raises(TypeError, match=msg):
  251. index - "a"
  252. with pytest.raises(TypeError, match=msg):
  253. index - index
  254. with pytest.raises(TypeError, match=msg):
  255. index - index.tolist()
  256. with pytest.raises(TypeError, match=msg):
  257. index.tolist() - index
  258. def test_sub_object(self):
  259. # GH#19369
  260. index = pd.Index([Decimal(1), Decimal(2)])
  261. expected = pd.Index([Decimal(0), Decimal(1)])
  262. result = index - Decimal(1)
  263. tm.assert_index_equal(result, expected)
  264. result = index - pd.Index([Decimal(1), Decimal(1)])
  265. tm.assert_index_equal(result, expected)
  266. msg = "unsupported operand type"
  267. with pytest.raises(TypeError, match=msg):
  268. index - "foo"
  269. with pytest.raises(TypeError, match=msg):
  270. index - np.array([2, "foo"], dtype=object)
  271. def test_rsub_object(self, fixed_now_ts):
  272. # GH#19369
  273. index = pd.Index([Decimal(1), Decimal(2)])
  274. expected = pd.Index([Decimal(1), Decimal(0)])
  275. result = Decimal(2) - index
  276. tm.assert_index_equal(result, expected)
  277. result = np.array([Decimal(2), Decimal(2)]) - index
  278. tm.assert_index_equal(result, expected)
  279. msg = "unsupported operand type"
  280. with pytest.raises(TypeError, match=msg):
  281. "foo" - index
  282. with pytest.raises(TypeError, match=msg):
  283. np.array([True, fixed_now_ts]) - index
  284. class MyIndex(pd.Index):
  285. # Simple index subclass that tracks ops calls.
  286. _calls: int
  287. @classmethod
  288. def _simple_new(cls, values, name=None, dtype=None):
  289. result = object.__new__(cls)
  290. result._data = values
  291. result._name = name
  292. result._calls = 0
  293. result._reset_identity()
  294. return result
  295. def __add__(self, other):
  296. self._calls += 1
  297. return self._simple_new(self._data)
  298. def __radd__(self, other):
  299. return self.__add__(other)
  300. @pytest.mark.parametrize(
  301. "other",
  302. [
  303. [datetime.timedelta(1), datetime.timedelta(2)],
  304. [datetime.datetime(2000, 1, 1), datetime.datetime(2000, 1, 2)],
  305. [pd.Period("2000"), pd.Period("2001")],
  306. ["a", "b"],
  307. ],
  308. ids=["timedelta", "datetime", "period", "object"],
  309. )
  310. def test_index_ops_defer_to_unknown_subclasses(other):
  311. # https://github.com/pandas-dev/pandas/issues/31109
  312. values = np.array(
  313. [datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)], dtype=object
  314. )
  315. a = MyIndex._simple_new(values)
  316. other = pd.Index(other)
  317. result = other + a
  318. assert isinstance(result, MyIndex)
  319. assert a._calls == 1