test_ufunc.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. from collections import deque
  2. import re
  3. import string
  4. import numpy as np
  5. import pytest
  6. import pandas.util._test_decorators as td
  7. import pandas as pd
  8. import pandas._testing as tm
  9. from pandas.arrays import SparseArray
  10. BINARY_UFUNCS = [np.add, np.logaddexp] # dunder op
  11. SPARSE = [True, False]
  12. SPARSE_IDS = ["sparse", "dense"]
  13. @pytest.fixture
  14. def arrays_for_binary_ufunc():
  15. """
  16. A pair of random, length-100 integer-dtype arrays, that are mostly 0.
  17. """
  18. a1 = np.random.randint(0, 10, 100, dtype="int64")
  19. a2 = np.random.randint(0, 10, 100, dtype="int64")
  20. a1[::3] = 0
  21. a2[::4] = 0
  22. return a1, a2
  23. @pytest.mark.parametrize("ufunc", [np.positive, np.floor, np.exp])
  24. @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
  25. def test_unary_ufunc(ufunc, sparse):
  26. # Test that ufunc(pd.Series) == pd.Series(ufunc)
  27. arr = np.random.randint(0, 10, 10, dtype="int64")
  28. arr[::2] = 0
  29. if sparse:
  30. arr = SparseArray(arr, dtype=pd.SparseDtype("int64", 0))
  31. index = list(string.ascii_letters[:10])
  32. name = "name"
  33. series = pd.Series(arr, index=index, name=name)
  34. result = ufunc(series)
  35. expected = pd.Series(ufunc(arr), index=index, name=name)
  36. tm.assert_series_equal(result, expected)
  37. @pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
  38. @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
  39. @pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"])
  40. def test_binary_ufunc_with_array(flip, sparse, ufunc, arrays_for_binary_ufunc):
  41. # Test that ufunc(pd.Series(a), array) == pd.Series(ufunc(a, b))
  42. a1, a2 = arrays_for_binary_ufunc
  43. if sparse:
  44. a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
  45. a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
  46. name = "name" # op(pd.Series, array) preserves the name.
  47. series = pd.Series(a1, name=name)
  48. other = a2
  49. array_args = (a1, a2)
  50. series_args = (series, other) # ufunc(series, array)
  51. if flip:
  52. array_args = reversed(array_args)
  53. series_args = reversed(series_args) # ufunc(array, series)
  54. expected = pd.Series(ufunc(*array_args), name=name)
  55. result = ufunc(*series_args)
  56. tm.assert_series_equal(result, expected)
  57. @pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
  58. @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
  59. @pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"])
  60. def test_binary_ufunc_with_index(flip, sparse, ufunc, arrays_for_binary_ufunc):
  61. # Test that
  62. # * func(pd.Series(a), pd.Series(b)) == pd.Series(ufunc(a, b))
  63. # * ufunc(Index, pd.Series) dispatches to pd.Series (returns a pd.Series)
  64. a1, a2 = arrays_for_binary_ufunc
  65. if sparse:
  66. a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
  67. a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
  68. name = "name" # op(pd.Series, array) preserves the name.
  69. series = pd.Series(a1, name=name)
  70. other = pd.Index(a2, name=name).astype("int64")
  71. array_args = (a1, a2)
  72. series_args = (series, other) # ufunc(series, array)
  73. if flip:
  74. array_args = reversed(array_args)
  75. series_args = reversed(series_args) # ufunc(array, series)
  76. expected = pd.Series(ufunc(*array_args), name=name)
  77. result = ufunc(*series_args)
  78. tm.assert_series_equal(result, expected)
  79. @pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
  80. @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
  81. @pytest.mark.parametrize("shuffle", [True, False], ids=["unaligned", "aligned"])
  82. @pytest.mark.parametrize("flip", [True, False], ids=["flipped", "straight"])
  83. def test_binary_ufunc_with_series(
  84. flip, shuffle, sparse, ufunc, arrays_for_binary_ufunc
  85. ):
  86. # Test that
  87. # * func(pd.Series(a), pd.Series(b)) == pd.Series(ufunc(a, b))
  88. # with alignment between the indices
  89. a1, a2 = arrays_for_binary_ufunc
  90. if sparse:
  91. a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
  92. a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
  93. name = "name" # op(pd.Series, array) preserves the name.
  94. series = pd.Series(a1, name=name)
  95. other = pd.Series(a2, name=name)
  96. idx = np.random.permutation(len(a1))
  97. if shuffle:
  98. other = other.take(idx)
  99. if flip:
  100. index = other.align(series)[0].index
  101. else:
  102. index = series.align(other)[0].index
  103. else:
  104. index = series.index
  105. array_args = (a1, a2)
  106. series_args = (series, other) # ufunc(series, array)
  107. if flip:
  108. array_args = tuple(reversed(array_args))
  109. series_args = tuple(reversed(series_args)) # ufunc(array, series)
  110. expected = pd.Series(ufunc(*array_args), index=index, name=name)
  111. result = ufunc(*series_args)
  112. tm.assert_series_equal(result, expected)
  113. @pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
  114. @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
  115. @pytest.mark.parametrize("flip", [True, False])
  116. def test_binary_ufunc_scalar(ufunc, sparse, flip, arrays_for_binary_ufunc):
  117. # Test that
  118. # * ufunc(pd.Series, scalar) == pd.Series(ufunc(array, scalar))
  119. # * ufunc(pd.Series, scalar) == ufunc(scalar, pd.Series)
  120. arr, _ = arrays_for_binary_ufunc
  121. if sparse:
  122. arr = SparseArray(arr)
  123. other = 2
  124. series = pd.Series(arr, name="name")
  125. series_args = (series, other)
  126. array_args = (arr, other)
  127. if flip:
  128. series_args = tuple(reversed(series_args))
  129. array_args = tuple(reversed(array_args))
  130. expected = pd.Series(ufunc(*array_args), name="name")
  131. result = ufunc(*series_args)
  132. tm.assert_series_equal(result, expected)
  133. @pytest.mark.parametrize("ufunc", [np.divmod]) # TODO: np.modf, np.frexp
  134. @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
  135. @pytest.mark.parametrize("shuffle", [True, False])
  136. @pytest.mark.filterwarnings("ignore:divide by zero:RuntimeWarning")
  137. def test_multiple_output_binary_ufuncs(ufunc, sparse, shuffle, arrays_for_binary_ufunc):
  138. # Test that
  139. # the same conditions from binary_ufunc_scalar apply to
  140. # ufuncs with multiple outputs.
  141. a1, a2 = arrays_for_binary_ufunc
  142. # work around https://github.com/pandas-dev/pandas/issues/26987
  143. a1[a1 == 0] = 1
  144. a2[a2 == 0] = 1
  145. if sparse:
  146. a1 = SparseArray(a1, dtype=pd.SparseDtype("int64", 0))
  147. a2 = SparseArray(a2, dtype=pd.SparseDtype("int64", 0))
  148. s1 = pd.Series(a1)
  149. s2 = pd.Series(a2)
  150. if shuffle:
  151. # ensure we align before applying the ufunc
  152. s2 = s2.sample(frac=1)
  153. expected = ufunc(a1, a2)
  154. assert isinstance(expected, tuple)
  155. result = ufunc(s1, s2)
  156. assert isinstance(result, tuple)
  157. tm.assert_series_equal(result[0], pd.Series(expected[0]))
  158. tm.assert_series_equal(result[1], pd.Series(expected[1]))
  159. @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
  160. def test_multiple_output_ufunc(sparse, arrays_for_binary_ufunc):
  161. # Test that the same conditions from unary input apply to multi-output
  162. # ufuncs
  163. arr, _ = arrays_for_binary_ufunc
  164. if sparse:
  165. arr = SparseArray(arr)
  166. series = pd.Series(arr, name="name")
  167. result = np.modf(series)
  168. expected = np.modf(arr)
  169. assert isinstance(result, tuple)
  170. assert isinstance(expected, tuple)
  171. tm.assert_series_equal(result[0], pd.Series(expected[0], name="name"))
  172. tm.assert_series_equal(result[1], pd.Series(expected[1], name="name"))
  173. @pytest.mark.parametrize("sparse", SPARSE, ids=SPARSE_IDS)
  174. @pytest.mark.parametrize("ufunc", BINARY_UFUNCS)
  175. def test_binary_ufunc_drops_series_name(ufunc, sparse, arrays_for_binary_ufunc):
  176. # Drop the names when they differ.
  177. a1, a2 = arrays_for_binary_ufunc
  178. s1 = pd.Series(a1, name="a")
  179. s2 = pd.Series(a2, name="b")
  180. result = ufunc(s1, s2)
  181. assert result.name is None
  182. def test_object_series_ok():
  183. class Dummy:
  184. def __init__(self, value) -> None:
  185. self.value = value
  186. def __add__(self, other):
  187. return self.value + other.value
  188. arr = np.array([Dummy(0), Dummy(1)])
  189. ser = pd.Series(arr)
  190. tm.assert_series_equal(np.add(ser, ser), pd.Series(np.add(ser, arr)))
  191. tm.assert_series_equal(np.add(ser, Dummy(1)), pd.Series(np.add(ser, Dummy(1))))
  192. @pytest.fixture(
  193. params=[
  194. pd.array([1, 3, 2], dtype=np.int64),
  195. pd.array([1, 3, 2], dtype="Int64"),
  196. pd.array([1, 3, 2], dtype="Float32"),
  197. pd.array([1, 10, 2], dtype="Sparse[int]"),
  198. pd.to_datetime(["2000", "2010", "2001"]),
  199. pd.to_datetime(["2000", "2010", "2001"]).tz_localize("CET"),
  200. pd.to_datetime(["2000", "2010", "2001"]).to_period(freq="D"),
  201. pd.to_timedelta(["1 Day", "3 Days", "2 Days"]),
  202. pd.IntervalIndex([pd.Interval(0, 1), pd.Interval(2, 3), pd.Interval(1, 2)]),
  203. ],
  204. ids=lambda x: str(x.dtype),
  205. )
  206. def values_for_np_reduce(request):
  207. # min/max tests assume that these are monotonic increasing
  208. return request.param
  209. class TestNumpyReductions:
  210. # TODO: cases with NAs, axis kwarg for DataFrame
  211. def test_multiply(self, values_for_np_reduce, box_with_array, request):
  212. box = box_with_array
  213. values = values_for_np_reduce
  214. with tm.assert_produces_warning(None):
  215. obj = box(values)
  216. if isinstance(values, pd.core.arrays.SparseArray):
  217. mark = pytest.mark.xfail(reason="SparseArray has no 'prod'")
  218. request.node.add_marker(mark)
  219. if values.dtype.kind in "iuf":
  220. result = np.multiply.reduce(obj)
  221. if box is pd.DataFrame:
  222. expected = obj.prod(numeric_only=False)
  223. tm.assert_series_equal(result, expected)
  224. elif box is pd.Index:
  225. # Index has no 'prod'
  226. expected = obj._values.prod()
  227. assert result == expected
  228. else:
  229. expected = obj.prod()
  230. assert result == expected
  231. else:
  232. msg = "|".join(
  233. [
  234. "does not support reduction",
  235. "unsupported operand type",
  236. "ufunc 'multiply' cannot use operands",
  237. ]
  238. )
  239. with pytest.raises(TypeError, match=msg):
  240. np.multiply.reduce(obj)
  241. def test_add(self, values_for_np_reduce, box_with_array):
  242. box = box_with_array
  243. values = values_for_np_reduce
  244. with tm.assert_produces_warning(None):
  245. obj = box(values)
  246. if values.dtype.kind in "miuf":
  247. result = np.add.reduce(obj)
  248. if box is pd.DataFrame:
  249. expected = obj.sum(numeric_only=False)
  250. tm.assert_series_equal(result, expected)
  251. elif box is pd.Index:
  252. # Index has no 'sum'
  253. expected = obj._values.sum()
  254. assert result == expected
  255. else:
  256. expected = obj.sum()
  257. assert result == expected
  258. else:
  259. msg = "|".join(
  260. [
  261. "does not support reduction",
  262. "unsupported operand type",
  263. "ufunc 'add' cannot use operands",
  264. ]
  265. )
  266. with pytest.raises(TypeError, match=msg):
  267. np.add.reduce(obj)
  268. def test_max(self, values_for_np_reduce, box_with_array):
  269. box = box_with_array
  270. values = values_for_np_reduce
  271. same_type = True
  272. if box is pd.Index and values.dtype.kind in ["i", "f"]:
  273. # ATM Index casts to object, so we get python ints/floats
  274. same_type = False
  275. with tm.assert_produces_warning(None):
  276. obj = box(values)
  277. result = np.maximum.reduce(obj)
  278. if box is pd.DataFrame:
  279. # TODO: cases with axis kwarg
  280. expected = obj.max(numeric_only=False)
  281. tm.assert_series_equal(result, expected)
  282. else:
  283. expected = values[1]
  284. assert result == expected
  285. if same_type:
  286. # check we have e.g. Timestamp instead of dt64
  287. assert type(result) == type(expected)
  288. def test_min(self, values_for_np_reduce, box_with_array):
  289. box = box_with_array
  290. values = values_for_np_reduce
  291. same_type = True
  292. if box is pd.Index and values.dtype.kind in ["i", "f"]:
  293. # ATM Index casts to object, so we get python ints/floats
  294. same_type = False
  295. with tm.assert_produces_warning(None):
  296. obj = box(values)
  297. result = np.minimum.reduce(obj)
  298. if box is pd.DataFrame:
  299. expected = obj.min(numeric_only=False)
  300. tm.assert_series_equal(result, expected)
  301. else:
  302. expected = values[0]
  303. assert result == expected
  304. if same_type:
  305. # check we have e.g. Timestamp instead of dt64
  306. assert type(result) == type(expected)
  307. @pytest.mark.parametrize("type_", [list, deque, tuple])
  308. def test_binary_ufunc_other_types(type_):
  309. a = pd.Series([1, 2, 3], name="name")
  310. b = type_([3, 4, 5])
  311. result = np.add(a, b)
  312. expected = pd.Series(np.add(a.to_numpy(), b), name="name")
  313. tm.assert_series_equal(result, expected)
  314. def test_object_dtype_ok():
  315. class Thing:
  316. def __init__(self, value) -> None:
  317. self.value = value
  318. def __add__(self, other):
  319. other = getattr(other, "value", other)
  320. return type(self)(self.value + other)
  321. def __eq__(self, other) -> bool:
  322. return type(other) is Thing and self.value == other.value
  323. def __repr__(self) -> str:
  324. return f"Thing({self.value})"
  325. s = pd.Series([Thing(1), Thing(2)])
  326. result = np.add(s, Thing(1))
  327. expected = pd.Series([Thing(2), Thing(3)])
  328. tm.assert_series_equal(result, expected)
  329. def test_outer():
  330. # https://github.com/pandas-dev/pandas/issues/27186
  331. ser = pd.Series([1, 2, 3])
  332. obj = np.array([1, 2, 3])
  333. with pytest.raises(NotImplementedError, match=tm.EMPTY_STRING_PATTERN):
  334. np.subtract.outer(ser, obj)
  335. def test_np_matmul():
  336. # GH26650
  337. df1 = pd.DataFrame(data=[[-1, 1, 10]])
  338. df2 = pd.DataFrame(data=[-1, 1, 10])
  339. expected = pd.DataFrame(data=[102])
  340. result = np.matmul(df1, df2)
  341. tm.assert_frame_equal(expected, result)
  342. def test_array_ufuncs_for_many_arguments():
  343. # GH39853
  344. def add3(x, y, z):
  345. return x + y + z
  346. ufunc = np.frompyfunc(add3, 3, 1)
  347. ser = pd.Series([1, 2])
  348. result = ufunc(ser, ser, 1)
  349. expected = pd.Series([3, 5], dtype=object)
  350. tm.assert_series_equal(result, expected)
  351. df = pd.DataFrame([[1, 2]])
  352. msg = (
  353. "Cannot apply ufunc <ufunc 'add3 (vectorized)'> "
  354. "to mixed DataFrame and Series inputs."
  355. )
  356. with pytest.raises(NotImplementedError, match=re.escape(msg)):
  357. ufunc(ser, ser, df)
  358. # TODO(CoW) see https://github.com/pandas-dev/pandas/pull/51082
  359. @td.skip_copy_on_write_not_yet_implemented
  360. def test_np_fix():
  361. # np.fix is not a ufunc but is composed of several ufunc calls under the hood
  362. # with `out` and `where` keywords
  363. ser = pd.Series([-1.5, -0.5, 0.5, 1.5])
  364. result = np.fix(ser)
  365. expected = pd.Series([-1.0, -0.0, 0.0, 1.0])
  366. tm.assert_series_equal(result, expected)