123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443 |
- from copy import (
- copy,
- deepcopy,
- )
- import numpy as np
- import pytest
- from pandas.core.dtypes.common import is_scalar
- from pandas import (
- DataFrame,
- Series,
- )
- import pandas._testing as tm
- # ----------------------------------------------------------------------
- # Generic types test cases
- def construct(box, shape, value=None, dtype=None, **kwargs):
- """
- construct an object for the given shape
- if value is specified use that if its a scalar
- if value is an array, repeat it as needed
- """
- if isinstance(shape, int):
- shape = tuple([shape] * box._AXIS_LEN)
- if value is not None:
- if is_scalar(value):
- if value == "empty":
- arr = None
- dtype = np.float64
- # remove the info axis
- kwargs.pop(box._info_axis_name, None)
- else:
- arr = np.empty(shape, dtype=dtype)
- arr.fill(value)
- else:
- fshape = np.prod(shape)
- arr = value.ravel()
- new_shape = fshape / arr.shape[0]
- if fshape % arr.shape[0] != 0:
- raise Exception("invalid value passed in construct")
- arr = np.repeat(arr, new_shape).reshape(shape)
- else:
- arr = np.random.randn(*shape)
- return box(arr, dtype=dtype, **kwargs)
- class TestGeneric:
- @pytest.mark.parametrize(
- "func",
- [
- str.lower,
- {x: x.lower() for x in list("ABCD")},
- Series({x: x.lower() for x in list("ABCD")}),
- ],
- )
- def test_rename(self, frame_or_series, func):
- # single axis
- idx = list("ABCD")
- for axis in frame_or_series._AXIS_ORDERS:
- kwargs = {axis: idx}
- obj = construct(frame_or_series, 4, **kwargs)
- # rename a single axis
- result = obj.rename(**{axis: func})
- expected = obj.copy()
- setattr(expected, axis, list("abcd"))
- tm.assert_equal(result, expected)
- def test_get_numeric_data(self, frame_or_series):
- n = 4
- kwargs = {
- frame_or_series._get_axis_name(i): list(range(n))
- for i in range(frame_or_series._AXIS_LEN)
- }
- # get the numeric data
- o = construct(frame_or_series, n, **kwargs)
- result = o._get_numeric_data()
- tm.assert_equal(result, o)
- # non-inclusion
- result = o._get_bool_data()
- expected = construct(frame_or_series, n, value="empty", **kwargs)
- if isinstance(o, DataFrame):
- # preserve columns dtype
- expected.columns = o.columns[:0]
- # https://github.com/pandas-dev/pandas/issues/50862
- tm.assert_equal(result.reset_index(drop=True), expected)
- # get the bool data
- arr = np.array([True, True, False, True])
- o = construct(frame_or_series, n, value=arr, **kwargs)
- result = o._get_numeric_data()
- tm.assert_equal(result, o)
- def test_nonzero(self, frame_or_series):
- # GH 4633
- # look at the boolean/nonzero behavior for objects
- obj = construct(frame_or_series, shape=4)
- msg = f"The truth value of a {frame_or_series.__name__} is ambiguous"
- with pytest.raises(ValueError, match=msg):
- bool(obj == 0)
- with pytest.raises(ValueError, match=msg):
- bool(obj == 1)
- with pytest.raises(ValueError, match=msg):
- bool(obj)
- obj = construct(frame_or_series, shape=4, value=1)
- with pytest.raises(ValueError, match=msg):
- bool(obj == 0)
- with pytest.raises(ValueError, match=msg):
- bool(obj == 1)
- with pytest.raises(ValueError, match=msg):
- bool(obj)
- obj = construct(frame_or_series, shape=4, value=np.nan)
- with pytest.raises(ValueError, match=msg):
- bool(obj == 0)
- with pytest.raises(ValueError, match=msg):
- bool(obj == 1)
- with pytest.raises(ValueError, match=msg):
- bool(obj)
- # empty
- obj = construct(frame_or_series, shape=0)
- with pytest.raises(ValueError, match=msg):
- bool(obj)
- # invalid behaviors
- obj1 = construct(frame_or_series, shape=4, value=1)
- obj2 = construct(frame_or_series, shape=4, value=1)
- with pytest.raises(ValueError, match=msg):
- if obj1:
- pass
- with pytest.raises(ValueError, match=msg):
- obj1 and obj2
- with pytest.raises(ValueError, match=msg):
- obj1 or obj2
- with pytest.raises(ValueError, match=msg):
- not obj1
- def test_frame_or_series_compound_dtypes(self, frame_or_series):
- # see gh-5191
- # Compound dtypes should raise NotImplementedError.
- def f(dtype):
- return construct(frame_or_series, shape=3, value=1, dtype=dtype)
- msg = (
- "compound dtypes are not implemented "
- f"in the {frame_or_series.__name__} constructor"
- )
- with pytest.raises(NotImplementedError, match=msg):
- f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")])
- # these work (though results may be unexpected)
- f("int64")
- f("float64")
- f("M8[ns]")
- def test_metadata_propagation(self, frame_or_series):
- # check that the metadata matches up on the resulting ops
- o = construct(frame_or_series, shape=3)
- o.name = "foo"
- o2 = construct(frame_or_series, shape=3)
- o2.name = "bar"
- # ----------
- # preserving
- # ----------
- # simple ops with scalars
- for op in ["__add__", "__sub__", "__truediv__", "__mul__"]:
- result = getattr(o, op)(1)
- tm.assert_metadata_equivalent(o, result)
- # ops with like
- for op in ["__add__", "__sub__", "__truediv__", "__mul__"]:
- result = getattr(o, op)(o)
- tm.assert_metadata_equivalent(o, result)
- # simple boolean
- for op in ["__eq__", "__le__", "__ge__"]:
- v1 = getattr(o, op)(o)
- tm.assert_metadata_equivalent(o, v1)
- tm.assert_metadata_equivalent(o, v1 & v1)
- tm.assert_metadata_equivalent(o, v1 | v1)
- # combine_first
- result = o.combine_first(o2)
- tm.assert_metadata_equivalent(o, result)
- # ---------------------------
- # non-preserving (by default)
- # ---------------------------
- # add non-like
- result = o + o2
- tm.assert_metadata_equivalent(result)
- # simple boolean
- for op in ["__eq__", "__le__", "__ge__"]:
- # this is a name matching op
- v1 = getattr(o, op)(o)
- v2 = getattr(o, op)(o2)
- tm.assert_metadata_equivalent(v2)
- tm.assert_metadata_equivalent(v1 & v2)
- tm.assert_metadata_equivalent(v1 | v2)
- def test_size_compat(self, frame_or_series):
- # GH8846
- # size property should be defined
- o = construct(frame_or_series, shape=10)
- assert o.size == np.prod(o.shape)
- assert o.size == 10 ** len(o.axes)
- def test_split_compat(self, frame_or_series):
- # xref GH8846
- o = construct(frame_or_series, shape=10)
- assert len(np.array_split(o, 5)) == 5
- assert len(np.array_split(o, 2)) == 2
- # See gh-12301
- def test_stat_unexpected_keyword(self, frame_or_series):
- obj = construct(frame_or_series, 5)
- starwars = "Star Wars"
- errmsg = "unexpected keyword"
- with pytest.raises(TypeError, match=errmsg):
- obj.max(epic=starwars) # stat_function
- with pytest.raises(TypeError, match=errmsg):
- obj.var(epic=starwars) # stat_function_ddof
- with pytest.raises(TypeError, match=errmsg):
- obj.sum(epic=starwars) # cum_function
- with pytest.raises(TypeError, match=errmsg):
- obj.any(epic=starwars) # logical_function
- @pytest.mark.parametrize("func", ["sum", "cumsum", "any", "var"])
- def test_api_compat(self, func, frame_or_series):
- # GH 12021
- # compat for __name__, __qualname__
- obj = construct(frame_or_series, 5)
- f = getattr(obj, func)
- assert f.__name__ == func
- assert f.__qualname__.endswith(func)
- def test_stat_non_defaults_args(self, frame_or_series):
- obj = construct(frame_or_series, 5)
- out = np.array([0])
- errmsg = "the 'out' parameter is not supported"
- with pytest.raises(ValueError, match=errmsg):
- obj.max(out=out) # stat_function
- with pytest.raises(ValueError, match=errmsg):
- obj.var(out=out) # stat_function_ddof
- with pytest.raises(ValueError, match=errmsg):
- obj.sum(out=out) # cum_function
- with pytest.raises(ValueError, match=errmsg):
- obj.any(out=out) # logical_function
- def test_truncate_out_of_bounds(self, frame_or_series):
- # GH11382
- # small
- shape = [2000] + ([1] * (frame_or_series._AXIS_LEN - 1))
- small = construct(frame_or_series, shape, dtype="int8", value=1)
- tm.assert_equal(small.truncate(), small)
- tm.assert_equal(small.truncate(before=0, after=3e3), small)
- tm.assert_equal(small.truncate(before=-1, after=2e3), small)
- # big
- shape = [2_000_000] + ([1] * (frame_or_series._AXIS_LEN - 1))
- big = construct(frame_or_series, shape, dtype="int8", value=1)
- tm.assert_equal(big.truncate(), big)
- tm.assert_equal(big.truncate(before=0, after=3e6), big)
- tm.assert_equal(big.truncate(before=-1, after=2e6), big)
- @pytest.mark.parametrize(
- "func",
- [copy, deepcopy, lambda x: x.copy(deep=False), lambda x: x.copy(deep=True)],
- )
- @pytest.mark.parametrize("shape", [0, 1, 2])
- def test_copy_and_deepcopy(self, frame_or_series, shape, func):
- # GH 15444
- obj = construct(frame_or_series, shape)
- obj_copy = func(obj)
- assert obj_copy is not obj
- tm.assert_equal(obj_copy, obj)
- class TestNDFrame:
- # tests that don't fit elsewhere
- @pytest.mark.parametrize(
- "ser", [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]
- )
- def test_squeeze_series_noop(self, ser):
- # noop
- tm.assert_series_equal(ser.squeeze(), ser)
- def test_squeeze_frame_noop(self):
- # noop
- df = tm.makeTimeDataFrame()
- tm.assert_frame_equal(df.squeeze(), df)
- def test_squeeze_frame_reindex(self):
- # squeezing
- df = tm.makeTimeDataFrame().reindex(columns=["A"])
- tm.assert_series_equal(df.squeeze(), df["A"])
- def test_squeeze_0_len_dim(self):
- # don't fail with 0 length dimensions GH11229 & GH8999
- empty_series = Series([], name="five", dtype=np.float64)
- empty_frame = DataFrame([empty_series])
- tm.assert_series_equal(empty_series, empty_series.squeeze())
- tm.assert_series_equal(empty_series, empty_frame.squeeze())
- def test_squeeze_axis(self):
- # axis argument
- df = tm.makeTimeDataFrame(nper=1).iloc[:, :1]
- assert df.shape == (1, 1)
- tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0])
- tm.assert_series_equal(df.squeeze(axis="index"), df.iloc[0])
- tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0])
- tm.assert_series_equal(df.squeeze(axis="columns"), df.iloc[:, 0])
- assert df.squeeze() == df.iloc[0, 0]
- msg = "No axis named 2 for object type DataFrame"
- with pytest.raises(ValueError, match=msg):
- df.squeeze(axis=2)
- msg = "No axis named x for object type DataFrame"
- with pytest.raises(ValueError, match=msg):
- df.squeeze(axis="x")
- def test_squeeze_axis_len_3(self):
- df = tm.makeTimeDataFrame(3)
- tm.assert_frame_equal(df.squeeze(axis=0), df)
- def test_numpy_squeeze(self):
- s = tm.makeFloatSeries()
- tm.assert_series_equal(np.squeeze(s), s)
- df = tm.makeTimeDataFrame().reindex(columns=["A"])
- tm.assert_series_equal(np.squeeze(df), df["A"])
- @pytest.mark.parametrize(
- "ser", [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]
- )
- def test_transpose_series(self, ser):
- # calls implementation in pandas/core/base.py
- tm.assert_series_equal(ser.transpose(), ser)
- def test_transpose_frame(self):
- df = tm.makeTimeDataFrame()
- tm.assert_frame_equal(df.transpose().transpose(), df)
- def test_numpy_transpose(self, frame_or_series):
- obj = tm.makeTimeDataFrame()
- obj = tm.get_obj(obj, frame_or_series)
- if frame_or_series is Series:
- # 1D -> np.transpose is no-op
- tm.assert_series_equal(np.transpose(obj), obj)
- # round-trip preserved
- tm.assert_equal(np.transpose(np.transpose(obj)), obj)
- msg = "the 'axes' parameter is not supported"
- with pytest.raises(ValueError, match=msg):
- np.transpose(obj, axes=1)
- @pytest.mark.parametrize(
- "ser", [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]
- )
- def test_take_series(self, ser):
- indices = [1, 5, -2, 6, 3, -1]
- out = ser.take(indices)
- expected = Series(
- data=ser.values.take(indices),
- index=ser.index.take(indices),
- dtype=ser.dtype,
- )
- tm.assert_series_equal(out, expected)
- def test_take_frame(self):
- indices = [1, 5, -2, 6, 3, -1]
- df = tm.makeTimeDataFrame()
- out = df.take(indices)
- expected = DataFrame(
- data=df.values.take(indices, axis=0),
- index=df.index.take(indices),
- columns=df.columns,
- )
- tm.assert_frame_equal(out, expected)
- def test_take_invalid_kwargs(self, frame_or_series):
- indices = [-3, 2, 0, 1]
- obj = tm.makeTimeDataFrame()
- obj = tm.get_obj(obj, frame_or_series)
- msg = r"take\(\) got an unexpected keyword argument 'foo'"
- with pytest.raises(TypeError, match=msg):
- obj.take(indices, foo=2)
- msg = "the 'out' parameter is not supported"
- with pytest.raises(ValueError, match=msg):
- obj.take(indices, out=indices)
- msg = "the 'mode' parameter is not supported"
- with pytest.raises(ValueError, match=msg):
- obj.take(indices, mode="clip")
- def test_axis_classmethods(self, frame_or_series):
- box = frame_or_series
- obj = box(dtype=object)
- values = box._AXIS_TO_AXIS_NUMBER.keys()
- for v in values:
- assert obj._get_axis_number(v) == box._get_axis_number(v)
- assert obj._get_axis_name(v) == box._get_axis_name(v)
- assert obj._get_block_manager_axis(v) == box._get_block_manager_axis(v)
- def test_flags_identity(self, frame_or_series):
- obj = Series([1, 2])
- if frame_or_series is DataFrame:
- obj = obj.to_frame()
- assert obj.flags is obj.flags
- obj2 = obj.copy()
- assert obj2.flags is not obj.flags
|