test_arithmetic.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. import operator
  2. import numpy as np
  3. import pytest
  4. import pandas as pd
  5. import pandas._testing as tm
  6. from pandas.core import ops
  7. from pandas.core.arrays import FloatingArray
  8. # Basic test for the arithmetic array ops
  9. # -----------------------------------------------------------------------------
  10. @pytest.mark.parametrize(
  11. "opname, exp",
  12. [("add", [1, 3, None, None, 9]), ("mul", [0, 2, None, None, 20])],
  13. ids=["add", "mul"],
  14. )
  15. def test_add_mul(dtype, opname, exp):
  16. a = pd.array([0, 1, None, 3, 4], dtype=dtype)
  17. b = pd.array([1, 2, 3, None, 5], dtype=dtype)
  18. # array / array
  19. expected = pd.array(exp, dtype=dtype)
  20. op = getattr(operator, opname)
  21. result = op(a, b)
  22. tm.assert_extension_array_equal(result, expected)
  23. op = getattr(ops, "r" + opname)
  24. result = op(a, b)
  25. tm.assert_extension_array_equal(result, expected)
  26. def test_sub(dtype):
  27. a = pd.array([1, 2, 3, None, 5], dtype=dtype)
  28. b = pd.array([0, 1, None, 3, 4], dtype=dtype)
  29. result = a - b
  30. expected = pd.array([1, 1, None, None, 1], dtype=dtype)
  31. tm.assert_extension_array_equal(result, expected)
  32. def test_div(dtype):
  33. a = pd.array([1, 2, 3, None, 5], dtype=dtype)
  34. b = pd.array([0, 1, None, 3, 4], dtype=dtype)
  35. result = a / b
  36. expected = pd.array([np.inf, 2, None, None, 1.25], dtype="Float64")
  37. tm.assert_extension_array_equal(result, expected)
  38. @pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)])
  39. def test_divide_by_zero(zero, negative):
  40. # https://github.com/pandas-dev/pandas/issues/27398, GH#22793
  41. a = pd.array([0, 1, -1, None], dtype="Int64")
  42. result = a / zero
  43. expected = FloatingArray(
  44. np.array([np.nan, np.inf, -np.inf, 1], dtype="float64"),
  45. np.array([False, False, False, True]),
  46. )
  47. if negative:
  48. expected *= -1
  49. tm.assert_extension_array_equal(result, expected)
  50. def test_floordiv(dtype):
  51. a = pd.array([1, 2, 3, None, 5], dtype=dtype)
  52. b = pd.array([0, 1, None, 3, 4], dtype=dtype)
  53. result = a // b
  54. # Series op sets 1//0 to np.inf, which IntegerArray does not do (yet)
  55. expected = pd.array([0, 2, None, None, 1], dtype=dtype)
  56. tm.assert_extension_array_equal(result, expected)
  57. def test_floordiv_by_int_zero_no_mask(any_int_ea_dtype):
  58. # GH 48223: Aligns with non-masked floordiv
  59. # but differs from numpy
  60. # https://github.com/pandas-dev/pandas/issues/30188#issuecomment-564452740
  61. ser = pd.Series([0, 1], dtype=any_int_ea_dtype)
  62. result = 1 // ser
  63. expected = pd.Series([np.inf, 1.0], dtype="Float64")
  64. tm.assert_series_equal(result, expected)
  65. ser_non_nullable = ser.astype(ser.dtype.numpy_dtype)
  66. result = 1 // ser_non_nullable
  67. expected = expected.astype(np.float64)
  68. tm.assert_series_equal(result, expected)
  69. def test_mod(dtype):
  70. a = pd.array([1, 2, 3, None, 5], dtype=dtype)
  71. b = pd.array([0, 1, None, 3, 4], dtype=dtype)
  72. result = a % b
  73. expected = pd.array([0, 0, None, None, 1], dtype=dtype)
  74. tm.assert_extension_array_equal(result, expected)
  75. def test_pow_scalar():
  76. a = pd.array([-1, 0, 1, None, 2], dtype="Int64")
  77. result = a**0
  78. expected = pd.array([1, 1, 1, 1, 1], dtype="Int64")
  79. tm.assert_extension_array_equal(result, expected)
  80. result = a**1
  81. expected = pd.array([-1, 0, 1, None, 2], dtype="Int64")
  82. tm.assert_extension_array_equal(result, expected)
  83. result = a**pd.NA
  84. expected = pd.array([None, None, 1, None, None], dtype="Int64")
  85. tm.assert_extension_array_equal(result, expected)
  86. result = a**np.nan
  87. expected = FloatingArray(
  88. np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64"),
  89. np.array([False, False, False, True, False]),
  90. )
  91. tm.assert_extension_array_equal(result, expected)
  92. # reversed
  93. a = a[1:] # Can't raise integers to negative powers.
  94. result = 0**a
  95. expected = pd.array([1, 0, None, 0], dtype="Int64")
  96. tm.assert_extension_array_equal(result, expected)
  97. result = 1**a
  98. expected = pd.array([1, 1, 1, 1], dtype="Int64")
  99. tm.assert_extension_array_equal(result, expected)
  100. result = pd.NA**a
  101. expected = pd.array([1, None, None, None], dtype="Int64")
  102. tm.assert_extension_array_equal(result, expected)
  103. result = np.nan**a
  104. expected = FloatingArray(
  105. np.array([1, np.nan, np.nan, np.nan], dtype="float64"),
  106. np.array([False, False, True, False]),
  107. )
  108. tm.assert_extension_array_equal(result, expected)
  109. def test_pow_array():
  110. a = pd.array([0, 0, 0, 1, 1, 1, None, None, None])
  111. b = pd.array([0, 1, None, 0, 1, None, 0, 1, None])
  112. result = a**b
  113. expected = pd.array([1, 0, None, 1, 1, 1, 1, None, None])
  114. tm.assert_extension_array_equal(result, expected)
  115. def test_rpow_one_to_na():
  116. # https://github.com/pandas-dev/pandas/issues/22022
  117. # https://github.com/pandas-dev/pandas/issues/29997
  118. arr = pd.array([np.nan, np.nan], dtype="Int64")
  119. result = np.array([1.0, 2.0]) ** arr
  120. expected = pd.array([1.0, np.nan], dtype="Float64")
  121. tm.assert_extension_array_equal(result, expected)
  122. @pytest.mark.parametrize("other", [0, 0.5])
  123. def test_numpy_zero_dim_ndarray(other):
  124. arr = pd.array([1, None, 2])
  125. result = arr + np.array(other)
  126. expected = arr + other
  127. tm.assert_equal(result, expected)
  128. # Test generic characteristics / errors
  129. # -----------------------------------------------------------------------------
  130. def test_error_invalid_values(data, all_arithmetic_operators):
  131. op = all_arithmetic_operators
  132. s = pd.Series(data)
  133. ops = getattr(s, op)
  134. # invalid scalars
  135. msg = "|".join(
  136. [
  137. r"can only perform ops with numeric values",
  138. r"IntegerArray cannot perform the operation mod",
  139. r"unsupported operand type",
  140. r"can only concatenate str \(not \"int\"\) to str",
  141. "not all arguments converted during string",
  142. "ufunc '.*' not supported for the input types, and the inputs could not",
  143. "ufunc '.*' did not contain a loop with signature matching types",
  144. "Addition/subtraction of integers and integer-arrays with Timestamp",
  145. ]
  146. )
  147. with pytest.raises(TypeError, match=msg):
  148. ops("foo")
  149. with pytest.raises(TypeError, match=msg):
  150. ops(pd.Timestamp("20180101"))
  151. # invalid array-likes
  152. str_ser = pd.Series("foo", index=s.index)
  153. # with pytest.raises(TypeError, match=msg):
  154. if all_arithmetic_operators in [
  155. "__mul__",
  156. "__rmul__",
  157. ]: # (data[~data.isna()] >= 0).all():
  158. res = ops(str_ser)
  159. expected = pd.Series(["foo" * x for x in data], index=s.index)
  160. tm.assert_series_equal(res, expected)
  161. else:
  162. with pytest.raises(TypeError, match=msg):
  163. ops(str_ser)
  164. msg = "|".join(
  165. [
  166. "can only perform ops with numeric values",
  167. "cannot perform .* with this index type: DatetimeArray",
  168. "Addition/subtraction of integers and integer-arrays "
  169. "with DatetimeArray is no longer supported. *",
  170. "unsupported operand type",
  171. r"can only concatenate str \(not \"int\"\) to str",
  172. "not all arguments converted during string",
  173. "cannot subtract DatetimeArray from ndarray",
  174. ]
  175. )
  176. with pytest.raises(TypeError, match=msg):
  177. ops(pd.Series(pd.date_range("20180101", periods=len(s))))
  178. # Various
  179. # -----------------------------------------------------------------------------
  180. # TODO test unsigned overflow
  181. def test_arith_coerce_scalar(data, all_arithmetic_operators):
  182. op = tm.get_op_from_name(all_arithmetic_operators)
  183. s = pd.Series(data)
  184. other = 0.01
  185. result = op(s, other)
  186. expected = op(s.astype(float), other)
  187. expected = expected.astype("Float64")
  188. # rmod results in NaN that wasn't NA in original nullable Series -> unmask it
  189. if all_arithmetic_operators == "__rmod__":
  190. mask = (s == 0).fillna(False).to_numpy(bool)
  191. expected.array._mask[mask] = False
  192. tm.assert_series_equal(result, expected)
  193. @pytest.mark.parametrize("other", [1.0, np.array(1.0)])
  194. def test_arithmetic_conversion(all_arithmetic_operators, other):
  195. # if we have a float operand we should have a float result
  196. # if that is equal to an integer
  197. op = tm.get_op_from_name(all_arithmetic_operators)
  198. s = pd.Series([1, 2, 3], dtype="Int64")
  199. result = op(s, other)
  200. assert result.dtype == "Float64"
  201. def test_cross_type_arithmetic():
  202. df = pd.DataFrame(
  203. {
  204. "A": pd.Series([1, 2, np.nan], dtype="Int64"),
  205. "B": pd.Series([1, np.nan, 3], dtype="UInt8"),
  206. "C": [1, 2, 3],
  207. }
  208. )
  209. result = df.A + df.C
  210. expected = pd.Series([2, 4, np.nan], dtype="Int64")
  211. tm.assert_series_equal(result, expected)
  212. result = (df.A + df.C) * 3 == 12
  213. expected = pd.Series([False, True, None], dtype="boolean")
  214. tm.assert_series_equal(result, expected)
  215. result = df.A + df.B
  216. expected = pd.Series([2, np.nan, np.nan], dtype="Int64")
  217. tm.assert_series_equal(result, expected)
  218. @pytest.mark.parametrize("op", ["mean"])
  219. def test_reduce_to_float(op):
  220. # some reduce ops always return float, even if the result
  221. # is a rounded number
  222. df = pd.DataFrame(
  223. {
  224. "A": ["a", "b", "b"],
  225. "B": [1, None, 3],
  226. "C": pd.array([1, None, 3], dtype="Int64"),
  227. }
  228. )
  229. # op
  230. result = getattr(df.C, op)()
  231. assert isinstance(result, float)
  232. # groupby
  233. result = getattr(df.groupby("A"), op)()
  234. expected = pd.DataFrame(
  235. {"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Float64")},
  236. index=pd.Index(["a", "b"], name="A"),
  237. )
  238. tm.assert_frame_equal(result, expected)
  239. @pytest.mark.parametrize(
  240. "source, neg_target, abs_target",
  241. [
  242. ([1, 2, 3], [-1, -2, -3], [1, 2, 3]),
  243. ([1, 2, None], [-1, -2, None], [1, 2, None]),
  244. ([-1, 0, 1], [1, 0, -1], [1, 0, 1]),
  245. ],
  246. )
  247. def test_unary_int_operators(any_signed_int_ea_dtype, source, neg_target, abs_target):
  248. dtype = any_signed_int_ea_dtype
  249. arr = pd.array(source, dtype=dtype)
  250. neg_result, pos_result, abs_result = -arr, +arr, abs(arr)
  251. neg_target = pd.array(neg_target, dtype=dtype)
  252. abs_target = pd.array(abs_target, dtype=dtype)
  253. tm.assert_extension_array_equal(neg_result, neg_target)
  254. tm.assert_extension_array_equal(pos_result, arr)
  255. assert not tm.shares_memory(pos_result, arr)
  256. tm.assert_extension_array_equal(abs_result, abs_target)
  257. def test_values_multiplying_large_series_by_NA():
  258. # GH#33701
  259. result = pd.NA * pd.Series(np.zeros(10001))
  260. expected = pd.Series([pd.NA] * 10001)
  261. tm.assert_series_equal(result, expected)
  262. def test_bitwise(dtype):
  263. left = pd.array([1, None, 3, 4], dtype=dtype)
  264. right = pd.array([None, 3, 5, 4], dtype=dtype)
  265. result = left | right
  266. expected = pd.array([None, None, 3 | 5, 4 | 4], dtype=dtype)
  267. tm.assert_extension_array_equal(result, expected)
  268. result = left & right
  269. expected = pd.array([None, None, 3 & 5, 4 & 4], dtype=dtype)
  270. tm.assert_extension_array_equal(result, expected)
  271. result = left ^ right
  272. expected = pd.array([None, None, 3 ^ 5, 4 ^ 4], dtype=dtype)
  273. tm.assert_extension_array_equal(result, expected)
  274. # TODO: desired behavior when operating with boolean? defer?
  275. floats = right.astype("Float64")
  276. with pytest.raises(TypeError, match="unsupported operand type"):
  277. left | floats
  278. with pytest.raises(TypeError, match="unsupported operand type"):
  279. left & floats
  280. with pytest.raises(TypeError, match="unsupported operand type"):
  281. left ^ floats