123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456 |
- """
- This file contains a minimal set of tests for compliance with the extension
- array interface test suite, and should contain no other tests.
- The test suite for the full functionality of the array is located in
- `pandas/tests/arrays/`.
- The tests in this file are inherited from the BaseExtensionTests, and only
- minimal tweaks should be applied to get the tests passing (by overwriting a
- parent method).
- Additional tests should either be added to one of the BaseExtensionTests
- classes (if they are relevant for the extension interface for all dtypes), or
- be added to the array-specific tests in `pandas/tests/arrays/`.
- Note: we do not bother with base.BaseIndexTests because PandasArray
- will never be held in an Index.
- """
- import numpy as np
- import pytest
- from pandas.core.dtypes.cast import can_hold_element
- from pandas.core.dtypes.dtypes import (
- ExtensionDtype,
- PandasDtype,
- )
- import pandas as pd
- import pandas._testing as tm
- from pandas.api.types import is_object_dtype
- from pandas.core.arrays.numpy_ import PandasArray
- from pandas.core.internals import blocks
- from pandas.tests.extension import base
- def _can_hold_element_patched(obj, element) -> bool:
- if isinstance(element, PandasArray):
- element = element.to_numpy()
- return can_hold_element(obj, element)
- orig_assert_attr_equal = tm.assert_attr_equal
- def _assert_attr_equal(attr: str, left, right, obj: str = "Attributes"):
- """
- patch tm.assert_attr_equal so PandasDtype("object") is closed enough to
- np.dtype("object")
- """
- if attr == "dtype":
- lattr = getattr(left, "dtype", None)
- rattr = getattr(right, "dtype", None)
- if isinstance(lattr, PandasDtype) and not isinstance(rattr, PandasDtype):
- left = left.astype(lattr.numpy_dtype)
- elif isinstance(rattr, PandasDtype) and not isinstance(lattr, PandasDtype):
- right = right.astype(rattr.numpy_dtype)
- orig_assert_attr_equal(attr, left, right, obj)
- @pytest.fixture(params=["float", "object"])
- def dtype(request):
- return PandasDtype(np.dtype(request.param))
- @pytest.fixture
- def allow_in_pandas(monkeypatch):
- """
- A monkeypatch to tells pandas to let us in.
- By default, passing a PandasArray to an index / series / frame
- constructor will unbox that PandasArray to an ndarray, and treat
- it as a non-EA column. We don't want people using EAs without
- reason.
- The mechanism for this is a check against ABCPandasArray
- in each constructor.
- But, for testing, we need to allow them in pandas. So we patch
- the _typ of PandasArray, so that we evade the ABCPandasArray
- check.
- """
- with monkeypatch.context() as m:
- m.setattr(PandasArray, "_typ", "extension")
- m.setattr(blocks, "can_hold_element", _can_hold_element_patched)
- m.setattr(tm.asserters, "assert_attr_equal", _assert_attr_equal)
- yield
- @pytest.fixture
- def data(allow_in_pandas, dtype):
- if dtype.numpy_dtype == "object":
- return pd.Series([(i,) for i in range(100)]).array
- return PandasArray(np.arange(1, 101, dtype=dtype._dtype))
- @pytest.fixture
- def data_missing(allow_in_pandas, dtype):
- if dtype.numpy_dtype == "object":
- return PandasArray(np.array([np.nan, (1,)], dtype=object))
- return PandasArray(np.array([np.nan, 1.0]))
- @pytest.fixture
- def na_value():
- return np.nan
- @pytest.fixture
- def na_cmp():
- def cmp(a, b):
- return np.isnan(a) and np.isnan(b)
- return cmp
- @pytest.fixture
- def data_for_sorting(allow_in_pandas, dtype):
- """Length-3 array with a known sort order.
- This should be three items [B, C, A] with
- A < B < C
- """
- if dtype.numpy_dtype == "object":
- # Use an empty tuple for first element, then remove,
- # to disable np.array's shape inference.
- return PandasArray(np.array([(), (2,), (3,), (1,)], dtype=object)[1:])
- return PandasArray(np.array([1, 2, 0]))
- @pytest.fixture
- def data_missing_for_sorting(allow_in_pandas, dtype):
- """Length-3 array with a known sort order.
- This should be three items [B, NA, A] with
- A < B and NA missing.
- """
- if dtype.numpy_dtype == "object":
- return PandasArray(np.array([(1,), np.nan, (0,)], dtype=object))
- return PandasArray(np.array([1, np.nan, 0]))
- @pytest.fixture
- def data_for_grouping(allow_in_pandas, dtype):
- """Data for factorization, grouping, and unique tests.
- Expected to be like [B, B, NA, NA, A, A, B, C]
- Where A < B < C and NA is missing
- """
- if dtype.numpy_dtype == "object":
- a, b, c = (1,), (2,), (3,)
- else:
- a, b, c = np.arange(3)
- return PandasArray(
- np.array([b, b, np.nan, np.nan, a, a, b, c], dtype=dtype.numpy_dtype)
- )
- @pytest.fixture
- def skip_numpy_object(dtype, request):
- """
- Tests for PandasArray with nested data. Users typically won't create
- these objects via `pd.array`, but they can show up through `.array`
- on a Series with nested data. Many of the base tests fail, as they aren't
- appropriate for nested data.
- This fixture allows these tests to be skipped when used as a usefixtures
- marker to either an individual test or a test class.
- """
- if dtype == "object":
- mark = pytest.mark.xfail(reason="Fails for object dtype")
- request.node.add_marker(mark)
- skip_nested = pytest.mark.usefixtures("skip_numpy_object")
- class BaseNumPyTests:
- @classmethod
- def assert_series_equal(cls, left, right, *args, **kwargs):
- # base class tests hard-code expected values with numpy dtypes,
- # whereas we generally want the corresponding PandasDtype
- if (
- isinstance(right, pd.Series)
- and not isinstance(right.dtype, ExtensionDtype)
- and isinstance(left.dtype, PandasDtype)
- ):
- right = right.astype(PandasDtype(right.dtype))
- return tm.assert_series_equal(left, right, *args, **kwargs)
- class TestCasting(BaseNumPyTests, base.BaseCastingTests):
- pass
- class TestConstructors(BaseNumPyTests, base.BaseConstructorsTests):
- @pytest.mark.skip(reason="We don't register our dtype")
- # We don't want to register. This test should probably be split in two.
- def test_from_dtype(self, data):
- pass
- @skip_nested
- def test_series_constructor_scalar_with_index(self, data, dtype):
- # ValueError: Length of passed values is 1, index implies 3.
- super().test_series_constructor_scalar_with_index(data, dtype)
- class TestDtype(BaseNumPyTests, base.BaseDtypeTests):
- def test_check_dtype(self, data, request):
- if data.dtype.numpy_dtype == "object":
- request.node.add_marker(
- pytest.mark.xfail(
- reason=f"PandasArray expectedly clashes with a "
- f"NumPy name: {data.dtype.numpy_dtype}"
- )
- )
- super().test_check_dtype(data)
- def test_is_not_object_type(self, dtype, request):
- if dtype.numpy_dtype == "object":
- # Different from BaseDtypeTests.test_is_not_object_type
- # because PandasDtype(object) is an object type
- assert is_object_dtype(dtype)
- else:
- super().test_is_not_object_type(dtype)
- class TestGetitem(BaseNumPyTests, base.BaseGetitemTests):
- @skip_nested
- def test_getitem_scalar(self, data):
- # AssertionError
- super().test_getitem_scalar(data)
- class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests):
- pass
- class TestInterface(BaseNumPyTests, base.BaseInterfaceTests):
- @skip_nested
- def test_array_interface(self, data):
- # NumPy array shape inference
- super().test_array_interface(data)
- class TestMethods(BaseNumPyTests, base.BaseMethodsTests):
- @skip_nested
- def test_shift_fill_value(self, data):
- # np.array shape inference. Shift implementation fails.
- super().test_shift_fill_value(data)
- @skip_nested
- def test_fillna_copy_frame(self, data_missing):
- # The "scalar" for this array isn't a scalar.
- super().test_fillna_copy_frame(data_missing)
- @skip_nested
- def test_fillna_copy_series(self, data_missing):
- # The "scalar" for this array isn't a scalar.
- super().test_fillna_copy_series(data_missing)
- @skip_nested
- def test_searchsorted(self, data_for_sorting, as_series):
- # Test setup fails.
- super().test_searchsorted(data_for_sorting, as_series)
- @pytest.mark.xfail(reason="PandasArray.diff may fail on dtype")
- def test_diff(self, data, periods):
- return super().test_diff(data, periods)
- def test_insert(self, data, request):
- if data.dtype.numpy_dtype == object:
- mark = pytest.mark.xfail(reason="Dimension mismatch in np.concatenate")
- request.node.add_marker(mark)
- super().test_insert(data)
- @skip_nested
- def test_insert_invalid(self, data, invalid_scalar):
- # PandasArray[object] can hold anything, so skip
- super().test_insert_invalid(data, invalid_scalar)
- class TestArithmetics(BaseNumPyTests, base.BaseArithmeticOpsTests):
- divmod_exc = None
- series_scalar_exc = None
- frame_scalar_exc = None
- series_array_exc = None
- @skip_nested
- def test_divmod(self, data):
- super().test_divmod(data)
- @skip_nested
- def test_divmod_series_array(self, data):
- ser = pd.Series(data)
- self._check_divmod_op(ser, divmod, data, exc=None)
- @skip_nested
- def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
- super().test_arith_series_with_scalar(data, all_arithmetic_operators)
- def test_arith_series_with_array(self, data, all_arithmetic_operators, request):
- opname = all_arithmetic_operators
- if data.dtype.numpy_dtype == object and opname not in ["__add__", "__radd__"]:
- mark = pytest.mark.xfail(reason="Fails for object dtype")
- request.node.add_marker(mark)
- super().test_arith_series_with_array(data, all_arithmetic_operators)
- @skip_nested
- def test_arith_frame_with_scalar(self, data, all_arithmetic_operators):
- super().test_arith_frame_with_scalar(data, all_arithmetic_operators)
- class TestPrinting(BaseNumPyTests, base.BasePrintingTests):
- pass
- class TestNumericReduce(BaseNumPyTests, base.BaseNumericReduceTests):
- def check_reduce(self, s, op_name, skipna):
- result = getattr(s, op_name)(skipna=skipna)
- # avoid coercing int -> float. Just cast to the actual numpy type.
- expected = getattr(s.astype(s.dtype._dtype), op_name)(skipna=skipna)
- tm.assert_almost_equal(result, expected)
- @pytest.mark.parametrize("skipna", [True, False])
- def test_reduce_series(self, data, all_boolean_reductions, skipna):
- super().test_reduce_series(data, all_boolean_reductions, skipna)
- @skip_nested
- class TestBooleanReduce(BaseNumPyTests, base.BaseBooleanReduceTests):
- pass
- class TestMissing(BaseNumPyTests, base.BaseMissingTests):
- @skip_nested
- def test_fillna_series(self, data_missing):
- # Non-scalar "scalar" values.
- super().test_fillna_series(data_missing)
- @skip_nested
- def test_fillna_frame(self, data_missing):
- # Non-scalar "scalar" values.
- super().test_fillna_frame(data_missing)
- class TestReshaping(BaseNumPyTests, base.BaseReshapingTests):
- @pytest.mark.parametrize(
- "in_frame",
- [
- True,
- pytest.param(
- False,
- marks=pytest.mark.xfail(reason="PandasArray inconsistently extracted"),
- ),
- ],
- )
- def test_concat(self, data, in_frame):
- super().test_concat(data, in_frame)
- class TestSetitem(BaseNumPyTests, base.BaseSetitemTests):
- @skip_nested
- def test_setitem_invalid(self, data, invalid_scalar):
- # object dtype can hold anything, so doesn't raise
- super().test_setitem_invalid(data, invalid_scalar)
- @skip_nested
- def test_setitem_sequence_broadcasts(self, data, box_in_series):
- # ValueError: cannot set using a list-like indexer with a different
- # length than the value
- super().test_setitem_sequence_broadcasts(data, box_in_series)
- @skip_nested
- @pytest.mark.parametrize("setter", ["loc", None])
- def test_setitem_mask_broadcast(self, data, setter):
- # ValueError: cannot set using a list-like indexer with a different
- # length than the value
- super().test_setitem_mask_broadcast(data, setter)
- @skip_nested
- def test_setitem_scalar_key_sequence_raise(self, data):
- # Failed: DID NOT RAISE <class 'ValueError'>
- super().test_setitem_scalar_key_sequence_raise(data)
- # TODO: there is some issue with PandasArray, therefore,
- # skip the setitem test for now, and fix it later (GH 31446)
- @skip_nested
- @pytest.mark.parametrize(
- "mask",
- [
- np.array([True, True, True, False, False]),
- pd.array([True, True, True, False, False], dtype="boolean"),
- ],
- ids=["numpy-array", "boolean-array"],
- )
- def test_setitem_mask(self, data, mask, box_in_series):
- super().test_setitem_mask(data, mask, box_in_series)
- @skip_nested
- @pytest.mark.parametrize(
- "idx",
- [[0, 1, 2], pd.array([0, 1, 2], dtype="Int64"), np.array([0, 1, 2])],
- ids=["list", "integer-array", "numpy-array"],
- )
- def test_setitem_integer_array(self, data, idx, box_in_series):
- super().test_setitem_integer_array(data, idx, box_in_series)
- @pytest.mark.parametrize(
- "idx, box_in_series",
- [
- ([0, 1, 2, pd.NA], False),
- pytest.param([0, 1, 2, pd.NA], True, marks=pytest.mark.xfail),
- (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
- (pd.array([0, 1, 2, pd.NA], dtype="Int64"), False),
- ],
- ids=["list-False", "list-True", "integer-array-False", "integer-array-True"],
- )
- def test_setitem_integer_with_missing_raises(self, data, idx, box_in_series):
- super().test_setitem_integer_with_missing_raises(data, idx, box_in_series)
- @skip_nested
- def test_setitem_slice(self, data, box_in_series):
- super().test_setitem_slice(data, box_in_series)
- @skip_nested
- def test_setitem_loc_iloc_slice(self, data):
- super().test_setitem_loc_iloc_slice(data)
- def test_setitem_with_expansion_dataframe_column(self, data, full_indexer):
- # https://github.com/pandas-dev/pandas/issues/32395
- df = expected = pd.DataFrame({"data": pd.Series(data)})
- result = pd.DataFrame(index=df.index)
- # because result has object dtype, the attempt to do setting inplace
- # is successful, and object dtype is retained
- key = full_indexer(df)
- result.loc[key, "data"] = df["data"]
- # base class method has expected = df; PandasArray behaves oddly because
- # we patch _typ for these tests.
- if data.dtype.numpy_dtype != object:
- if not isinstance(key, slice) or key != slice(None):
- expected = pd.DataFrame({"data": data.to_numpy()})
- self.assert_frame_equal(result, expected)
- @skip_nested
- class TestParsing(BaseNumPyTests, base.BaseParsingTests):
- pass
- class Test2DCompat(BaseNumPyTests, base.NDArrayBacked2DTests):
- pass
|