123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551 |
- import numpy as np
- import pytest
- from pandas.core.dtypes.common import (
- is_datetime64_dtype,
- is_timedelta64_dtype,
- )
- from pandas.core.dtypes.dtypes import DatetimeTZDtype
- import pandas as pd
- from pandas import (
- CategoricalIndex,
- Series,
- Timedelta,
- Timestamp,
- date_range,
- )
- import pandas._testing as tm
- from pandas.core.arrays import (
- DatetimeArray,
- IntervalArray,
- PandasArray,
- PeriodArray,
- SparseArray,
- TimedeltaArray,
- )
- class TestToIterable:
- # test that we convert an iterable to python types
- dtypes = [
- ("int8", int),
- ("int16", int),
- ("int32", int),
- ("int64", int),
- ("uint8", int),
- ("uint16", int),
- ("uint32", int),
- ("uint64", int),
- ("float16", float),
- ("float32", float),
- ("float64", float),
- ("datetime64[ns]", Timestamp),
- ("datetime64[ns, US/Eastern]", Timestamp),
- ("timedelta64[ns]", Timedelta),
- ]
- @pytest.mark.parametrize("dtype, rdtype", dtypes)
- @pytest.mark.parametrize(
- "method",
- [
- lambda x: x.tolist(),
- lambda x: x.to_list(),
- lambda x: list(x),
- lambda x: list(x.__iter__()),
- ],
- ids=["tolist", "to_list", "list", "iter"],
- )
- def test_iterable(self, index_or_series, method, dtype, rdtype):
- # gh-10904
- # gh-13258
- # coerce iteration to underlying python / pandas types
- typ = index_or_series
- if dtype == "float16" and issubclass(typ, pd.Index):
- with pytest.raises(NotImplementedError, match="float16 indexes are not "):
- typ([1], dtype=dtype)
- return
- s = typ([1], dtype=dtype)
- result = method(s)[0]
- assert isinstance(result, rdtype)
- @pytest.mark.parametrize(
- "dtype, rdtype, obj",
- [
- ("object", object, "a"),
- ("object", int, 1),
- ("category", object, "a"),
- ("category", int, 1),
- ],
- )
- @pytest.mark.parametrize(
- "method",
- [
- lambda x: x.tolist(),
- lambda x: x.to_list(),
- lambda x: list(x),
- lambda x: list(x.__iter__()),
- ],
- ids=["tolist", "to_list", "list", "iter"],
- )
- def test_iterable_object_and_category(
- self, index_or_series, method, dtype, rdtype, obj
- ):
- # gh-10904
- # gh-13258
- # coerce iteration to underlying python / pandas types
- typ = index_or_series
- s = typ([obj], dtype=dtype)
- result = method(s)[0]
- assert isinstance(result, rdtype)
- @pytest.mark.parametrize("dtype, rdtype", dtypes)
- def test_iterable_items(self, dtype, rdtype):
- # gh-13258
- # test if items yields the correct boxed scalars
- # this only applies to series
- s = Series([1], dtype=dtype)
- _, result = list(s.items())[0]
- assert isinstance(result, rdtype)
- _, result = list(s.items())[0]
- assert isinstance(result, rdtype)
- @pytest.mark.parametrize(
- "dtype, rdtype", dtypes + [("object", int), ("category", int)]
- )
- def test_iterable_map(self, index_or_series, dtype, rdtype):
- # gh-13236
- # coerce iteration to underlying python / pandas types
- typ = index_or_series
- if dtype == "float16" and issubclass(typ, pd.Index):
- with pytest.raises(NotImplementedError, match="float16 indexes are not "):
- typ([1], dtype=dtype)
- return
- s = typ([1], dtype=dtype)
- result = s.map(type)[0]
- if not isinstance(rdtype, tuple):
- rdtype = (rdtype,)
- assert result in rdtype
- @pytest.mark.parametrize(
- "method",
- [
- lambda x: x.tolist(),
- lambda x: x.to_list(),
- lambda x: list(x),
- lambda x: list(x.__iter__()),
- ],
- ids=["tolist", "to_list", "list", "iter"],
- )
- def test_categorial_datetimelike(self, method):
- i = CategoricalIndex([Timestamp("1999-12-31"), Timestamp("2000-12-31")])
- result = method(i)[0]
- assert isinstance(result, Timestamp)
- def test_iter_box(self):
- vals = [Timestamp("2011-01-01"), Timestamp("2011-01-02")]
- s = Series(vals)
- assert s.dtype == "datetime64[ns]"
- for res, exp in zip(s, vals):
- assert isinstance(res, Timestamp)
- assert res.tz is None
- assert res == exp
- vals = [
- Timestamp("2011-01-01", tz="US/Eastern"),
- Timestamp("2011-01-02", tz="US/Eastern"),
- ]
- s = Series(vals)
- assert s.dtype == "datetime64[ns, US/Eastern]"
- for res, exp in zip(s, vals):
- assert isinstance(res, Timestamp)
- assert res.tz == exp.tz
- assert res == exp
- # timedelta
- vals = [Timedelta("1 days"), Timedelta("2 days")]
- s = Series(vals)
- assert s.dtype == "timedelta64[ns]"
- for res, exp in zip(s, vals):
- assert isinstance(res, Timedelta)
- assert res == exp
- # period
- vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
- s = Series(vals)
- assert s.dtype == "Period[M]"
- for res, exp in zip(s, vals):
- assert isinstance(res, pd.Period)
- assert res.freq == "M"
- assert res == exp
- @pytest.mark.parametrize(
- "arr, expected_type, dtype",
- [
- (np.array([0, 1], dtype=np.int64), np.ndarray, "int64"),
- (np.array(["a", "b"]), np.ndarray, "object"),
- (pd.Categorical(["a", "b"]), pd.Categorical, "category"),
- (
- pd.DatetimeIndex(["2017", "2018"], tz="US/Central"),
- DatetimeArray,
- "datetime64[ns, US/Central]",
- ),
- (
- pd.PeriodIndex([2018, 2019], freq="A"),
- PeriodArray,
- pd.core.dtypes.dtypes.PeriodDtype("A-DEC"),
- ),
- (pd.IntervalIndex.from_breaks([0, 1, 2]), IntervalArray, "interval"),
- (
- pd.DatetimeIndex(["2017", "2018"]),
- DatetimeArray,
- "datetime64[ns]",
- ),
- (
- pd.TimedeltaIndex([10**10]),
- TimedeltaArray,
- "m8[ns]",
- ),
- ],
- )
- def test_values_consistent(arr, expected_type, dtype):
- l_values = Series(arr)._values
- r_values = pd.Index(arr)._values
- assert type(l_values) is expected_type
- assert type(l_values) is type(r_values)
- tm.assert_equal(l_values, r_values)
- @pytest.mark.parametrize("arr", [np.array([1, 2, 3])])
- def test_numpy_array(arr):
- ser = Series(arr)
- result = ser.array
- expected = PandasArray(arr)
- tm.assert_extension_array_equal(result, expected)
- def test_numpy_array_all_dtypes(any_numpy_dtype):
- ser = Series(dtype=any_numpy_dtype)
- result = ser.array
- if is_datetime64_dtype(any_numpy_dtype):
- assert isinstance(result, DatetimeArray)
- elif is_timedelta64_dtype(any_numpy_dtype):
- assert isinstance(result, TimedeltaArray)
- else:
- assert isinstance(result, PandasArray)
- @pytest.mark.parametrize(
- "arr, attr",
- [
- (pd.Categorical(["a", "b"]), "_codes"),
- (pd.core.arrays.period_array(["2000", "2001"], freq="D"), "_ndarray"),
- (pd.array([0, np.nan], dtype="Int64"), "_data"),
- (IntervalArray.from_breaks([0, 1]), "_left"),
- (SparseArray([0, 1]), "_sparse_values"),
- (DatetimeArray(np.array([1, 2], dtype="datetime64[ns]")), "_ndarray"),
- # tz-aware Datetime
- (
- DatetimeArray(
- np.array(
- ["2000-01-01T12:00:00", "2000-01-02T12:00:00"], dtype="M8[ns]"
- ),
- dtype=DatetimeTZDtype(tz="US/Central"),
- ),
- "_ndarray",
- ),
- ],
- )
- def test_array(arr, attr, index_or_series, request):
- box = index_or_series
- result = box(arr, copy=False).array
- if attr:
- arr = getattr(arr, attr)
- result = getattr(result, attr)
- assert result is arr
- def test_array_multiindex_raises():
- idx = pd.MultiIndex.from_product([["A"], ["a", "b"]])
- msg = "MultiIndex has no single backing array"
- with pytest.raises(ValueError, match=msg):
- idx.array
- @pytest.mark.parametrize(
- "arr, expected",
- [
- (np.array([1, 2], dtype=np.int64), np.array([1, 2], dtype=np.int64)),
- (pd.Categorical(["a", "b"]), np.array(["a", "b"], dtype=object)),
- (
- pd.core.arrays.period_array(["2000", "2001"], freq="D"),
- np.array([pd.Period("2000", freq="D"), pd.Period("2001", freq="D")]),
- ),
- (pd.array([0, np.nan], dtype="Int64"), np.array([0, pd.NA], dtype=object)),
- (
- IntervalArray.from_breaks([0, 1, 2]),
- np.array([pd.Interval(0, 1), pd.Interval(1, 2)], dtype=object),
- ),
- (SparseArray([0, 1]), np.array([0, 1], dtype=np.int64)),
- # tz-naive datetime
- (
- DatetimeArray(np.array(["2000", "2001"], dtype="M8[ns]")),
- np.array(["2000", "2001"], dtype="M8[ns]"),
- ),
- # tz-aware stays tz`-aware
- (
- DatetimeArray(
- np.array(
- ["2000-01-01T06:00:00", "2000-01-02T06:00:00"], dtype="M8[ns]"
- ),
- dtype=DatetimeTZDtype(tz="US/Central"),
- ),
- np.array(
- [
- Timestamp("2000-01-01", tz="US/Central"),
- Timestamp("2000-01-02", tz="US/Central"),
- ]
- ),
- ),
- # Timedelta
- (
- TimedeltaArray(np.array([0, 3600000000000], dtype="i8"), freq="H"),
- np.array([0, 3600000000000], dtype="m8[ns]"),
- ),
- # GH#26406 tz is preserved in Categorical[dt64tz]
- (
- pd.Categorical(date_range("2016-01-01", periods=2, tz="US/Pacific")),
- np.array(
- [
- Timestamp("2016-01-01", tz="US/Pacific"),
- Timestamp("2016-01-02", tz="US/Pacific"),
- ]
- ),
- ),
- ],
- )
- def test_to_numpy(arr, expected, index_or_series_or_array, request):
- box = index_or_series_or_array
- with tm.assert_produces_warning(None):
- thing = box(arr)
- if arr.dtype.name == "int64" and box is pd.array:
- mark = pytest.mark.xfail(reason="thing is Int64 and to_numpy() returns object")
- request.node.add_marker(mark)
- result = thing.to_numpy()
- tm.assert_numpy_array_equal(result, expected)
- result = np.asarray(thing)
- tm.assert_numpy_array_equal(result, expected)
- @pytest.mark.parametrize("as_series", [True, False])
- @pytest.mark.parametrize(
- "arr", [np.array([1, 2, 3], dtype="int64"), np.array(["a", "b", "c"], dtype=object)]
- )
- def test_to_numpy_copy(arr, as_series):
- obj = pd.Index(arr, copy=False)
- if as_series:
- obj = Series(obj.values, copy=False)
- # no copy by default
- result = obj.to_numpy()
- assert np.shares_memory(arr, result) is True
- result = obj.to_numpy(copy=False)
- assert np.shares_memory(arr, result) is True
- # copy=True
- result = obj.to_numpy(copy=True)
- assert np.shares_memory(arr, result) is False
- @pytest.mark.parametrize("as_series", [True, False])
- def test_to_numpy_dtype(as_series):
- tz = "US/Eastern"
- obj = pd.DatetimeIndex(["2000", "2001"], tz=tz)
- if as_series:
- obj = Series(obj)
- # preserve tz by default
- result = obj.to_numpy()
- expected = np.array(
- [Timestamp("2000", tz=tz), Timestamp("2001", tz=tz)], dtype=object
- )
- tm.assert_numpy_array_equal(result, expected)
- result = obj.to_numpy(dtype="object")
- tm.assert_numpy_array_equal(result, expected)
- result = obj.to_numpy(dtype="M8[ns]")
- expected = np.array(["2000-01-01T05", "2001-01-01T05"], dtype="M8[ns]")
- tm.assert_numpy_array_equal(result, expected)
- @pytest.mark.parametrize(
- "values, dtype, na_value, expected",
- [
- ([1, 2, None], "float64", 0, [1.0, 2.0, 0.0]),
- (
- [Timestamp("2000"), Timestamp("2000"), pd.NaT],
- None,
- Timestamp("2000"),
- [np.datetime64("2000-01-01T00:00:00.000000000")] * 3,
- ),
- ],
- )
- def test_to_numpy_na_value_numpy_dtype(
- index_or_series, values, dtype, na_value, expected
- ):
- obj = index_or_series(values)
- result = obj.to_numpy(dtype=dtype, na_value=na_value)
- expected = np.array(expected)
- tm.assert_numpy_array_equal(result, expected)
- @pytest.mark.parametrize(
- "data, multiindex, dtype, na_value, expected",
- [
- (
- [1, 2, None, 4],
- [(0, "a"), (0, "b"), (1, "b"), (1, "c")],
- float,
- None,
- [1.0, 2.0, np.nan, 4.0],
- ),
- (
- [1, 2, None, 4],
- [(0, "a"), (0, "b"), (1, "b"), (1, "c")],
- float,
- np.nan,
- [1.0, 2.0, np.nan, 4.0],
- ),
- (
- [1.0, 2.0, np.nan, 4.0],
- [("a", 0), ("a", 1), ("a", 2), ("b", 0)],
- int,
- 0,
- [1, 2, 0, 4],
- ),
- (
- [Timestamp("2000"), Timestamp("2000"), pd.NaT],
- [(0, Timestamp("2021")), (0, Timestamp("2022")), (1, Timestamp("2000"))],
- None,
- Timestamp("2000"),
- [np.datetime64("2000-01-01T00:00:00.000000000")] * 3,
- ),
- ],
- )
- def test_to_numpy_multiindex_series_na_value(
- data, multiindex, dtype, na_value, expected
- ):
- index = pd.MultiIndex.from_tuples(multiindex)
- series = Series(data, index=index)
- result = series.to_numpy(dtype=dtype, na_value=na_value)
- expected = np.array(expected)
- tm.assert_numpy_array_equal(result, expected)
- def test_to_numpy_kwargs_raises():
- # numpy
- s = Series([1, 2, 3])
- msg = r"to_numpy\(\) got an unexpected keyword argument 'foo'"
- with pytest.raises(TypeError, match=msg):
- s.to_numpy(foo=True)
- # extension
- s = Series([1, 2, 3], dtype="Int64")
- with pytest.raises(TypeError, match=msg):
- s.to_numpy(foo=True)
- @pytest.mark.parametrize(
- "data",
- [
- {"a": [1, 2, 3], "b": [1, 2, None]},
- {"a": np.array([1, 2, 3]), "b": np.array([1, 2, np.nan])},
- {"a": pd.array([1, 2, 3]), "b": pd.array([1, 2, None])},
- ],
- )
- @pytest.mark.parametrize("dtype, na_value", [(float, np.nan), (object, None)])
- def test_to_numpy_dataframe_na_value(data, dtype, na_value):
- # https://github.com/pandas-dev/pandas/issues/33820
- df = pd.DataFrame(data)
- result = df.to_numpy(dtype=dtype, na_value=na_value)
- expected = np.array([[1, 1], [2, 2], [3, na_value]], dtype=dtype)
- tm.assert_numpy_array_equal(result, expected)
- @pytest.mark.parametrize(
- "data, expected",
- [
- (
- {"a": pd.array([1, 2, None])},
- np.array([[1.0], [2.0], [np.nan]], dtype=float),
- ),
- (
- {"a": [1, 2, 3], "b": [1, 2, 3]},
- np.array([[1, 1], [2, 2], [3, 3]], dtype=float),
- ),
- ],
- )
- def test_to_numpy_dataframe_single_block(data, expected):
- # https://github.com/pandas-dev/pandas/issues/33820
- df = pd.DataFrame(data)
- result = df.to_numpy(dtype=float, na_value=np.nan)
- tm.assert_numpy_array_equal(result, expected)
- def test_to_numpy_dataframe_single_block_no_mutate():
- # https://github.com/pandas-dev/pandas/issues/33820
- result = pd.DataFrame(np.array([1.0, 2.0, np.nan]))
- expected = pd.DataFrame(np.array([1.0, 2.0, np.nan]))
- result.to_numpy(na_value=0.0)
- tm.assert_frame_equal(result, expected)
- class TestAsArray:
- @pytest.mark.parametrize("tz", [None, "US/Central"])
- def test_asarray_object_dt64(self, tz):
- ser = Series(date_range("2000", periods=2, tz=tz))
- with tm.assert_produces_warning(None):
- # Future behavior (for tzaware case) with no warning
- result = np.asarray(ser, dtype=object)
- expected = np.array(
- [Timestamp("2000-01-01", tz=tz), Timestamp("2000-01-02", tz=tz)]
- )
- tm.assert_numpy_array_equal(result, expected)
- def test_asarray_tz_naive(self):
- # This shouldn't produce a warning.
- ser = Series(date_range("2000", periods=2))
- expected = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]")
- result = np.asarray(ser)
- tm.assert_numpy_array_equal(result, expected)
- def test_asarray_tz_aware(self):
- tz = "US/Central"
- ser = Series(date_range("2000", periods=2, tz=tz))
- expected = np.array(["2000-01-01T06", "2000-01-02T06"], dtype="M8[ns]")
- result = np.asarray(ser, dtype="datetime64[ns]")
- tm.assert_numpy_array_equal(result, expected)
- # Old behavior with no warning
- result = np.asarray(ser, dtype="M8[ns]")
- tm.assert_numpy_array_equal(result, expected)
|