test_numpy.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456
  1. """
  2. This file contains a minimal set of tests for compliance with the extension
  3. array interface test suite, and should contain no other tests.
  4. The test suite for the full functionality of the array is located in
  5. `pandas/tests/arrays/`.
  6. The tests in this file are inherited from the BaseExtensionTests, and only
  7. minimal tweaks should be applied to get the tests passing (by overwriting a
  8. parent method).
  9. Additional tests should either be added to one of the BaseExtensionTests
  10. classes (if they are relevant for the extension interface for all dtypes), or
  11. be added to the array-specific tests in `pandas/tests/arrays/`.
  12. Note: we do not bother with base.BaseIndexTests because PandasArray
  13. will never be held in an Index.
  14. """
  15. import numpy as np
  16. import pytest
  17. from pandas.core.dtypes.cast import can_hold_element
  18. from pandas.core.dtypes.dtypes import (
  19. ExtensionDtype,
  20. PandasDtype,
  21. )
  22. import pandas as pd
  23. import pandas._testing as tm
  24. from pandas.api.types import is_object_dtype
  25. from pandas.core.arrays.numpy_ import PandasArray
  26. from pandas.core.internals import blocks
  27. from pandas.tests.extension import base
  28. def _can_hold_element_patched(obj, element) -> bool:
  29. if isinstance(element, PandasArray):
  30. element = element.to_numpy()
  31. return can_hold_element(obj, element)
  32. orig_assert_attr_equal = tm.assert_attr_equal
  33. def _assert_attr_equal(attr: str, left, right, obj: str = "Attributes"):
  34. """
  35. patch tm.assert_attr_equal so PandasDtype("object") is closed enough to
  36. np.dtype("object")
  37. """
  38. if attr == "dtype":
  39. lattr = getattr(left, "dtype", None)
  40. rattr = getattr(right, "dtype", None)
  41. if isinstance(lattr, PandasDtype) and not isinstance(rattr, PandasDtype):
  42. left = left.astype(lattr.numpy_dtype)
  43. elif isinstance(rattr, PandasDtype) and not isinstance(lattr, PandasDtype):
  44. right = right.astype(rattr.numpy_dtype)
  45. orig_assert_attr_equal(attr, left, right, obj)
  46. @pytest.fixture(params=["float", "object"])
  47. def dtype(request):
  48. return PandasDtype(np.dtype(request.param))
  49. @pytest.fixture
  50. def allow_in_pandas(monkeypatch):
  51. """
  52. A monkeypatch to tells pandas to let us in.
  53. By default, passing a PandasArray to an index / series / frame
  54. constructor will unbox that PandasArray to an ndarray, and treat
  55. it as a non-EA column. We don't want people using EAs without
  56. reason.
  57. The mechanism for this is a check against ABCPandasArray
  58. in each constructor.
  59. But, for testing, we need to allow them in pandas. So we patch
  60. the _typ of PandasArray, so that we evade the ABCPandasArray
  61. check.
  62. """
  63. with monkeypatch.context() as m:
  64. m.setattr(PandasArray, "_typ", "extension")
  65. m.setattr(blocks, "can_hold_element", _can_hold_element_patched)
  66. m.setattr(tm.asserters, "assert_attr_equal", _assert_attr_equal)
  67. yield
  68. @pytest.fixture
  69. def data(allow_in_pandas, dtype):
  70. if dtype.numpy_dtype == "object":
  71. return pd.Series([(i,) for i in range(100)]).array
  72. return PandasArray(np.arange(1, 101, dtype=dtype._dtype))
  73. @pytest.fixture
  74. def data_missing(allow_in_pandas, dtype):
  75. if dtype.numpy_dtype == "object":
  76. return PandasArray(np.array([np.nan, (1,)], dtype=object))
  77. return PandasArray(np.array([np.nan, 1.0]))
  78. @pytest.fixture
  79. def na_value():
  80. return np.nan
  81. @pytest.fixture
  82. def na_cmp():
  83. def cmp(a, b):
  84. return np.isnan(a) and np.isnan(b)
  85. return cmp
  86. @pytest.fixture
  87. def data_for_sorting(allow_in_pandas, dtype):
  88. """Length-3 array with a known sort order.
  89. This should be three items [B, C, A] with
  90. A < B < C
  91. """
  92. if dtype.numpy_dtype == "object":
  93. # Use an empty tuple for first element, then remove,
  94. # to disable np.array's shape inference.
  95. return PandasArray(np.array([(), (2,), (3,), (1,)], dtype=object)[1:])
  96. return PandasArray(np.array([1, 2, 0]))
  97. @pytest.fixture
  98. def data_missing_for_sorting(allow_in_pandas, dtype):
  99. """Length-3 array with a known sort order.
  100. This should be three items [B, NA, A] with
  101. A < B and NA missing.
  102. """
  103. if dtype.numpy_dtype == "object":
  104. return PandasArray(np.array([(1,), np.nan, (0,)], dtype=object))
  105. return PandasArray(np.array([1, np.nan, 0]))
  106. @pytest.fixture
  107. def data_for_grouping(allow_in_pandas, dtype):
  108. """Data for factorization, grouping, and unique tests.
  109. Expected to be like [B, B, NA, NA, A, A, B, C]
  110. Where A < B < C and NA is missing
  111. """
  112. if dtype.numpy_dtype == "object":
  113. a, b, c = (1,), (2,), (3,)
  114. else:
  115. a, b, c = np.arange(3)
  116. return PandasArray(
  117. np.array([b, b, np.nan, np.nan, a, a, b, c], dtype=dtype.numpy_dtype)
  118. )
  119. @pytest.fixture
  120. def skip_numpy_object(dtype, request):
  121. """
  122. Tests for PandasArray with nested data. Users typically won't create
  123. these objects via `pd.array`, but they can show up through `.array`
  124. on a Series with nested data. Many of the base tests fail, as they aren't
  125. appropriate for nested data.
  126. This fixture allows these tests to be skipped when used as a usefixtures
  127. marker to either an individual test or a test class.
  128. """
  129. if dtype == "object":
  130. mark = pytest.mark.xfail(reason="Fails for object dtype")
  131. request.node.add_marker(mark)
  132. skip_nested = pytest.mark.usefixtures("skip_numpy_object")
  133. class BaseNumPyTests:
  134. @classmethod
  135. def assert_series_equal(cls, left, right, *args, **kwargs):
  136. # base class tests hard-code expected values with numpy dtypes,
  137. # whereas we generally want the corresponding PandasDtype
  138. if (
  139. isinstance(right, pd.Series)
  140. and not isinstance(right.dtype, ExtensionDtype)
  141. and isinstance(left.dtype, PandasDtype)
  142. ):
  143. right = right.astype(PandasDtype(right.dtype))
  144. return tm.assert_series_equal(left, right, *args, **kwargs)
  145. class TestCasting(BaseNumPyTests, base.BaseCastingTests):
  146. pass
  147. class TestConstructors(BaseNumPyTests, base.BaseConstructorsTests):
  148. @pytest.mark.skip(reason="We don't register our dtype")
  149. # We don't want to register. This test should probably be split in two.
  150. def test_from_dtype(self, data):
  151. pass
  152. @skip_nested
  153. def test_series_constructor_scalar_with_index(self, data, dtype):
  154. # ValueError: Length of passed values is 1, index implies 3.
  155. super().test_series_constructor_scalar_with_index(data, dtype)
  156. class TestDtype(BaseNumPyTests, base.BaseDtypeTests):
  157. def test_check_dtype(self, data, request):
  158. if data.dtype.numpy_dtype == "object":
  159. request.node.add_marker(
  160. pytest.mark.xfail(
  161. reason=f"PandasArray expectedly clashes with a "
  162. f"NumPy name: {data.dtype.numpy_dtype}"
  163. )
  164. )
  165. super().test_check_dtype(data)
  166. def test_is_not_object_type(self, dtype, request):
  167. if dtype.numpy_dtype == "object":
  168. # Different from BaseDtypeTests.test_is_not_object_type
  169. # because PandasDtype(object) is an object type
  170. assert is_object_dtype(dtype)
  171. else:
  172. super().test_is_not_object_type(dtype)
  173. class TestGetitem(BaseNumPyTests, base.BaseGetitemTests):
  174. @skip_nested
  175. def test_getitem_scalar(self, data):
  176. # AssertionError
  177. super().test_getitem_scalar(data)
  178. class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests):
  179. pass
  180. class TestInterface(BaseNumPyTests, base.BaseInterfaceTests):
  181. @skip_nested
  182. def test_array_interface(self, data):
  183. # NumPy array shape inference
  184. super().test_array_interface(data)
  185. class TestMethods(BaseNumPyTests, base.BaseMethodsTests):
  186. @skip_nested
  187. def test_shift_fill_value(self, data):
  188. # np.array shape inference. Shift implementation fails.
  189. super().test_shift_fill_value(data)
  190. @skip_nested
  191. def test_fillna_copy_frame(self, data_missing):
  192. # The "scalar" for this array isn't a scalar.
  193. super().test_fillna_copy_frame(data_missing)
  194. @skip_nested
  195. def test_fillna_copy_series(self, data_missing):
  196. # The "scalar" for this array isn't a scalar.
  197. super().test_fillna_copy_series(data_missing)
  198. @skip_nested
  199. def test_searchsorted(self, data_for_sorting, as_series):
  200. # Test setup fails.
  201. super().test_searchsorted(data_for_sorting, as_series)
  202. @pytest.mark.xfail(reason="PandasArray.diff may fail on dtype")
  203. def test_diff(self, data, periods):
  204. return super().test_diff(data, periods)
  205. def test_insert(self, data, request):
  206. if data.dtype.numpy_dtype == object:
  207. mark = pytest.mark.xfail(reason="Dimension mismatch in np.concatenate")
  208. request.node.add_marker(mark)
  209. super().test_insert(data)
  210. @skip_nested
  211. def test_insert_invalid(self, data, invalid_scalar):
  212. # PandasArray[object] can hold anything, so skip
  213. super().test_insert_invalid(data, invalid_scalar)
  214. class TestArithmetics(BaseNumPyTests, base.BaseArithmeticOpsTests):
  215. divmod_exc = None
  216. series_scalar_exc = None
  217. frame_scalar_exc = None
  218. series_array_exc = None
  219. @skip_nested
  220. def test_divmod(self, data):
  221. super().test_divmod(data)
  222. @skip_nested
  223. def test_divmod_series_array(self, data):
  224. ser = pd.Series(data)
  225. self._check_divmod_op(ser, divmod, data, exc=None)
  226. @skip_nested
  227. def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
  228. super().test_arith_series_with_scalar(data, all_arithmetic_operators)
  229. def test_arith_series_with_array(self, data, all_arithmetic_operators, request):
  230. opname = all_arithmetic_operators
  231. if data.dtype.numpy_dtype == object and opname not in ["__add__", "__radd__"]:
  232. mark = pytest.mark.xfail(reason="Fails for object dtype")
  233. request.node.add_marker(mark)
  234. super().test_arith_series_with_array(data, all_arithmetic_operators)
  235. @skip_nested
  236. def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
  237. super().test_arith_frame_with_scalar(data, all_arithmetic_operators)
  238. class TestPrinting(BaseNumPyTests, base.BasePrintingTests):
  239. pass
  240. class TestNumericReduce(BaseNumPyTests, base.BaseNumericReduceTests):
  241. def check_reduce(self, s, op_name, skipna):
  242. result = getattr(s, op_name)(skipna=skipna)
  243. # avoid coercing int -> float. Just cast to the actual numpy type.
  244. expected = getattr(s.astype(s.dtype._dtype), op_name)(skipna=skipna)
  245. tm.assert_almost_equal(result, expected)
  246. @pytest.mark.parametrize("skipna", [True, False])
  247. def test_reduce_series(self, data, all_boolean_reductions, skipna):
  248. super().test_reduce_series(data, all_boolean_reductions, skipna)
  249. @skip_nested
  250. class TestBooleanReduce(BaseNumPyTests, base.BaseBooleanReduceTests):
  251. pass
  252. class TestMissing(BaseNumPyTests, base.BaseMissingTests):
  253. @skip_nested
  254. def test_fillna_series(self, data_missing):
  255. # Non-scalar "scalar" values.
  256. super().test_fillna_series(data_missing)
  257. @skip_nested
  258. def test_fillna_frame(self, data_missing):
  259. # Non-scalar "scalar" values.
  260. super().test_fillna_frame(data_missing)
  261. class TestReshaping(BaseNumPyTests, base.BaseReshapingTests):
  262. @pytest.mark.parametrize(
  263. "in_frame",
  264. [
  265. True,
  266. pytest.param(
  267. False,
  268. marks=pytest.mark.xfail(reason="PandasArray inconsistently extracted"),
  269. ),
  270. ],
  271. )
  272. def test_concat(self, data, in_frame):
  273. super().test_concat(data, in_frame)
  274. class TestSetitem(BaseNumPyTests, base.BaseSetitemTests):
  275. @skip_nested
  276. def test_setitem_invalid(self, data, invalid_scalar):
  277. # object dtype can hold anything, so doesn't raise
  278. super().test_setitem_invalid(data, invalid_scalar)
  279. @skip_nested
  280. def test_setitem_sequence_broadcasts(self, data, box_in_series):
  281. # ValueError: cannot set using a list-like indexer with a different
  282. # length than the value
  283. super().test_setitem_sequence_broadcasts(data, box_in_series)
  284. @skip_nested
  285. @pytest.mark.parametrize("setter", ["loc", None])
  286. def test_setitem_mask_broadcast(self, data, setter):
  287. # ValueError: cannot set using a list-like indexer with a different
  288. # length than the value
  289. super().test_setitem_mask_broadcast(data, setter)
  290. @skip_nested
  291. def test_setitem_scalar_key_sequence_raise(self, data):
  292. # Failed: DID NOT RAISE <class 'ValueError'>
  293. super().test_setitem_scalar_key_sequence_raise(data)
  294. # TODO: there is some issue with PandasArray, therefore,
  295. # skip the setitem test for now, and fix it later (GH 31446)
  296. @skip_nested
  297. @pytest.mark.parametrize(
  298. "mask",
  299. [
  300. np.array([True, True, True, False, False]),
  301. pd.array([True, True, True, False, False], dtype="boolean"),
  302. ],
  303. ids=["numpy-array", "boolean-array"],
  304. )
  305. def test_setitem_mask(self, data, mask, box_in_series):
  306. super().test_setitem_mask(data, mask, box_in_series)
  307. @skip_nested
  308. @pytest.mark.parametrize(
  309. "idx",
  310. [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
  311. ids=["list", "integer-array", "numpy-array"],
  312. )
  313. def test_setitem_integer_array(self, data, idx, box_in_series):
  314. super().test_setitem_integer_array(data, idx, box_in_series)
  315. @pytest.mark.parametrize(
  316. "idx, box_in_series",
  317. [
  318. ([0, 1, 2, pd.NA], False),
  319. pytest.param([0, 1, 2, pd.NA], True, marks=pytest.mark.xfail),
  320. (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
  321. (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
  322. ],
  323. ids=["list-False", "list-True", "integer-array-False", "integer-array-True"],
  324. )
  325. def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
  326. super().test_setitem_integer_with_missing_raises(data, idx, box_in_series)
  327. @skip_nested
  328. def test_setitem_slice(self, data, box_in_series):
  329. super().test_setitem_slice(data, box_in_series)
  330. @skip_nested
  331. def test_setitem_loc_iloc_slice(self, data):
  332. super().test_setitem_loc_iloc_slice(data)
  333. def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
  334. # https://github.com/pandas-dev/pandas/issues/32395
  335. df = expected = pd.DataFrame({"data": pd.Series(data)})
  336. result = pd.DataFrame(index=df.index)
  337. # because result has object dtype, the attempt to do setting inplace
  338. # is successful, and object dtype is retained
  339. key = full_indexer(df)
  340. result.loc[key, "data"] = df["data"]
  341. # base class method has expected = df; PandasArray behaves oddly because
  342. # we patch _typ for these tests.
  343. if data.dtype.numpy_dtype != object:
  344. if not isinstance(key, slice) or key != slice(None):
  345. expected = pd.DataFrame({"data": data.to_numpy()})
  346. self.assert_frame_equal(result, expected)
  347. @skip_nested
  348. class TestParsing(BaseNumPyTests, base.BaseParsingTests):
  349. pass
  350. class Test2DCompat(BaseNumPyTests, base.NDArrayBacked2DTests):
  351. pass