123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475 |
- """
- Also test support for datetime64[ns] in Series / DataFrame
- """
- from datetime import (
- datetime,
- timedelta,
- )
- import re
- from dateutil.tz import (
- gettz,
- tzutc,
- )
- import numpy as np
- import pytest
- import pytz
- from pandas._libs import index as libindex
- import pandas as pd
- from pandas import (
- DataFrame,
- Series,
- Timestamp,
- date_range,
- period_range,
- )
- import pandas._testing as tm
- def test_fancy_getitem():
- dti = date_range(
- freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
- )
- s = Series(np.arange(len(dti)), index=dti)
- assert s[48] == 48
- assert s["1/2/2009"] == 48
- assert s["2009-1-2"] == 48
- assert s[datetime(2009, 1, 2)] == 48
- assert s[Timestamp(datetime(2009, 1, 2))] == 48
- with pytest.raises(KeyError, match=r"^'2009-1-3'$"):
- s["2009-1-3"]
- tm.assert_series_equal(
- s["3/6/2009":"2009-06-05"], s[datetime(2009, 3, 6) : datetime(2009, 6, 5)]
- )
- def test_fancy_setitem():
- dti = date_range(
- freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
- )
- s = Series(np.arange(len(dti)), index=dti)
- s[48] = -1
- assert s[48] == -1
- s["1/2/2009"] = -2
- assert s[48] == -2
- s["1/2/2009":"2009-06-05"] = -3
- assert (s[48:54] == -3).all()
- @pytest.mark.parametrize("tz_source", ["pytz", "dateutil"])
- def test_getitem_setitem_datetime_tz(tz_source):
- if tz_source == "pytz":
- tzget = pytz.timezone
- else:
- # handle special case for utc in dateutil
- tzget = lambda x: tzutc() if x == "UTC" else gettz(x)
- N = 50
- # testing with timezone, GH #2785
- rng = date_range("1/1/1990", periods=N, freq="H", tz=tzget("US/Eastern"))
- ts = Series(np.random.randn(N), index=rng)
- # also test Timestamp tz handling, GH #2789
- result = ts.copy()
- result["1990-01-01 09:00:00+00:00"] = 0
- result["1990-01-01 09:00:00+00:00"] = ts[4]
- tm.assert_series_equal(result, ts)
- result = ts.copy()
- result["1990-01-01 03:00:00-06:00"] = 0
- result["1990-01-01 03:00:00-06:00"] = ts[4]
- tm.assert_series_equal(result, ts)
- # repeat with datetimes
- result = ts.copy()
- result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = 0
- result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = ts[4]
- tm.assert_series_equal(result, ts)
- result = ts.copy()
- dt = Timestamp(1990, 1, 1, 3).tz_localize(tzget("US/Central"))
- dt = dt.to_pydatetime()
- result[dt] = 0
- result[dt] = ts[4]
- tm.assert_series_equal(result, ts)
- def test_getitem_setitem_datetimeindex():
- N = 50
- # testing with timezone, GH #2785
- rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern")
- ts = Series(np.random.randn(N), index=rng)
- result = ts["1990-01-01 04:00:00"]
- expected = ts[4]
- assert result == expected
- result = ts.copy()
- result["1990-01-01 04:00:00"] = 0
- result["1990-01-01 04:00:00"] = ts[4]
- tm.assert_series_equal(result, ts)
- result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
- expected = ts[4:8]
- tm.assert_series_equal(result, expected)
- result = ts.copy()
- result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
- result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
- tm.assert_series_equal(result, ts)
- lb = "1990-01-01 04:00:00"
- rb = "1990-01-01 07:00:00"
- # GH#18435 strings get a pass from tzawareness compat
- result = ts[(ts.index >= lb) & (ts.index <= rb)]
- expected = ts[4:8]
- tm.assert_series_equal(result, expected)
- lb = "1990-01-01 04:00:00-0500"
- rb = "1990-01-01 07:00:00-0500"
- result = ts[(ts.index >= lb) & (ts.index <= rb)]
- expected = ts[4:8]
- tm.assert_series_equal(result, expected)
- # But we do not give datetimes a pass on tzawareness compat
- msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
- naive = datetime(1990, 1, 1, 4)
- for key in [naive, Timestamp(naive), np.datetime64(naive, "ns")]:
- with pytest.raises(KeyError, match=re.escape(repr(key))):
- # GH#36148 as of 2.0 we require tzawareness-compat
- ts[key]
- result = ts.copy()
- # GH#36148 as of 2.0 we do not ignore tzawareness mismatch in indexing,
- # so setting it as a new key casts to object rather than matching
- # rng[4]
- result[naive] = ts[4]
- assert result.index.dtype == object
- tm.assert_index_equal(result.index[:-1], rng.astype(object))
- assert result.index[-1] == naive
- msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
- with pytest.raises(TypeError, match=msg):
- # GH#36148 require tzawareness compat as of 2.0
- ts[naive : datetime(1990, 1, 1, 7)]
- result = ts.copy()
- with pytest.raises(TypeError, match=msg):
- # GH#36148 require tzawareness compat as of 2.0
- result[naive : datetime(1990, 1, 1, 7)] = 0
- with pytest.raises(TypeError, match=msg):
- # GH#36148 require tzawareness compat as of 2.0
- result[naive : datetime(1990, 1, 1, 7)] = 99
- # the __setitems__ here failed, so result should still match ts
- tm.assert_series_equal(result, ts)
- lb = naive
- rb = datetime(1990, 1, 1, 7)
- msg = r"Invalid comparison between dtype=datetime64\[ns, US/Eastern\] and datetime"
- with pytest.raises(TypeError, match=msg):
- # tznaive vs tzaware comparison is invalid
- # see GH#18376, GH#18162
- ts[(ts.index >= lb) & (ts.index <= rb)]
- lb = Timestamp(naive).tz_localize(rng.tzinfo)
- rb = Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo)
- result = ts[(ts.index >= lb) & (ts.index <= rb)]
- expected = ts[4:8]
- tm.assert_series_equal(result, expected)
- result = ts[ts.index[4]]
- expected = ts[4]
- assert result == expected
- result = ts[ts.index[4:8]]
- expected = ts[4:8]
- tm.assert_series_equal(result, expected)
- result = ts.copy()
- result[ts.index[4:8]] = 0
- result.iloc[4:8] = ts.iloc[4:8]
- tm.assert_series_equal(result, ts)
- # also test partial date slicing
- result = ts["1990-01-02"]
- expected = ts[24:48]
- tm.assert_series_equal(result, expected)
- result = ts.copy()
- result["1990-01-02"] = 0
- result["1990-01-02"] = ts[24:48]
- tm.assert_series_equal(result, ts)
- def test_getitem_setitem_periodindex():
- N = 50
- rng = period_range("1/1/1990", periods=N, freq="H")
- ts = Series(np.random.randn(N), index=rng)
- result = ts["1990-01-01 04"]
- expected = ts[4]
- assert result == expected
- result = ts.copy()
- result["1990-01-01 04"] = 0
- result["1990-01-01 04"] = ts[4]
- tm.assert_series_equal(result, ts)
- result = ts["1990-01-01 04":"1990-01-01 07"]
- expected = ts[4:8]
- tm.assert_series_equal(result, expected)
- result = ts.copy()
- result["1990-01-01 04":"1990-01-01 07"] = 0
- result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
- tm.assert_series_equal(result, ts)
- lb = "1990-01-01 04"
- rb = "1990-01-01 07"
- result = ts[(ts.index >= lb) & (ts.index <= rb)]
- expected = ts[4:8]
- tm.assert_series_equal(result, expected)
- # GH 2782
- result = ts[ts.index[4]]
- expected = ts[4]
- assert result == expected
- result = ts[ts.index[4:8]]
- expected = ts[4:8]
- tm.assert_series_equal(result, expected)
- result = ts.copy()
- result[ts.index[4:8]] = 0
- result.iloc[4:8] = ts.iloc[4:8]
- tm.assert_series_equal(result, ts)
- def test_datetime_indexing():
- index = date_range("1/1/2000", "1/7/2000")
- index = index.repeat(3)
- s = Series(len(index), index=index)
- stamp = Timestamp("1/8/2000")
- with pytest.raises(KeyError, match=re.escape(repr(stamp))):
- s[stamp]
- s[stamp] = 0
- assert s[stamp] == 0
- # not monotonic
- s = Series(len(index), index=index)
- s = s[::-1]
- with pytest.raises(KeyError, match=re.escape(repr(stamp))):
- s[stamp]
- s[stamp] = 0
- assert s[stamp] == 0
- # test duplicates in time series
- def test_indexing_with_duplicate_datetimeindex(
- rand_series_with_duplicate_datetimeindex,
- ):
- ts = rand_series_with_duplicate_datetimeindex
- uniques = ts.index.unique()
- for date in uniques:
- result = ts[date]
- mask = ts.index == date
- total = (ts.index == date).sum()
- expected = ts[mask]
- if total > 1:
- tm.assert_series_equal(result, expected)
- else:
- tm.assert_almost_equal(result, expected[0])
- cp = ts.copy()
- cp[date] = 0
- expected = Series(np.where(mask, 0, ts), index=ts.index)
- tm.assert_series_equal(cp, expected)
- key = datetime(2000, 1, 6)
- with pytest.raises(KeyError, match=re.escape(repr(key))):
- ts[key]
- # new index
- ts[datetime(2000, 1, 6)] = 0
- assert ts[datetime(2000, 1, 6)] == 0
- def test_loc_getitem_over_size_cutoff(monkeypatch):
- # #1821
- monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
- # create large list of non periodic datetime
- dates = []
- sec = timedelta(seconds=1)
- half_sec = timedelta(microseconds=500000)
- d = datetime(2011, 12, 5, 20, 30)
- n = 1100
- for i in range(n):
- dates.append(d)
- dates.append(d + sec)
- dates.append(d + sec + half_sec)
- dates.append(d + sec + sec + half_sec)
- d += 3 * sec
- # duplicate some values in the list
- duplicate_positions = np.random.randint(0, len(dates) - 1, 20)
- for p in duplicate_positions:
- dates[p + 1] = dates[p]
- df = DataFrame(np.random.randn(len(dates), 4), index=dates, columns=list("ABCD"))
- pos = n * 3
- timestamp = df.index[pos]
- assert timestamp in df.index
- # it works!
- df.loc[timestamp]
- assert len(df.loc[[timestamp]]) > 0
- def test_indexing_over_size_cutoff_period_index(monkeypatch):
- # GH 27136
- monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
- n = 1100
- idx = period_range("1/1/2000", freq="T", periods=n)
- assert idx._engine.over_size_threshold
- s = Series(np.random.randn(len(idx)), index=idx)
- pos = n - 1
- timestamp = idx[pos]
- assert timestamp in s.index
- # it works!
- s[timestamp]
- assert len(s.loc[[timestamp]]) > 0
- def test_indexing_unordered():
- # GH 2437
- rng = date_range(start="2011-01-01", end="2011-01-15")
- ts = Series(np.random.rand(len(rng)), index=rng)
- ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]])
- for t in ts.index:
- expected = ts[t]
- result = ts2[t]
- assert expected == result
- # GH 3448 (ranges)
- def compare(slobj):
- result = ts2[slobj].copy()
- result = result.sort_index()
- expected = ts[slobj]
- expected.index = expected.index._with_freq(None)
- tm.assert_series_equal(result, expected)
- compare(slice("2011-01-01", "2011-01-15"))
- with pytest.raises(KeyError, match="Value based partial slicing on non-monotonic"):
- compare(slice("2010-12-30", "2011-01-15"))
- compare(slice("2011-01-01", "2011-01-16"))
- # partial ranges
- compare(slice("2011-01-01", "2011-01-6"))
- compare(slice("2011-01-06", "2011-01-8"))
- compare(slice("2011-01-06", "2011-01-12"))
- # single values
- result = ts2["2011"].sort_index()
- expected = ts["2011"]
- expected.index = expected.index._with_freq(None)
- tm.assert_series_equal(result, expected)
- def test_indexing_unordered2():
- # diff freq
- rng = date_range(datetime(2005, 1, 1), periods=20, freq="M")
- ts = Series(np.arange(len(rng)), index=rng)
- ts = ts.take(np.random.permutation(20))
- result = ts["2005"]
- for t in result.index:
- assert t.year == 2005
- def test_indexing():
- idx = date_range("2001-1-1", periods=20, freq="M")
- ts = Series(np.random.rand(len(idx)), index=idx)
- # getting
- # GH 3070, make sure semantics work on Series/Frame
- expected = ts["2001"]
- expected.name = "A"
- df = DataFrame({"A": ts})
- # GH#36179 pre-2.0 df["2001"] operated as slicing on rows. in 2.0 it behaves
- # like any other key, so raises
- with pytest.raises(KeyError, match="2001"):
- df["2001"]
- # setting
- ts["2001"] = 1
- expected = ts["2001"]
- expected.name = "A"
- df.loc["2001", "A"] = 1
- with pytest.raises(KeyError, match="2001"):
- df["2001"]
- def test_getitem_str_month_with_datetimeindex():
- # GH3546 (not including times on the last day)
- idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:00", freq="H")
- ts = Series(range(len(idx)), index=idx)
- expected = ts["2013-05"]
- tm.assert_series_equal(expected, ts)
- idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:59", freq="S")
- ts = Series(range(len(idx)), index=idx)
- expected = ts["2013-05"]
- tm.assert_series_equal(expected, ts)
- def test_getitem_str_year_with_datetimeindex():
- idx = [
- Timestamp("2013-05-31 00:00"),
- Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999)),
- ]
- ts = Series(range(len(idx)), index=idx)
- expected = ts["2013"]
- tm.assert_series_equal(expected, ts)
- def test_getitem_str_second_with_datetimeindex():
- # GH14826, indexing with a seconds resolution string / datetime object
- df = DataFrame(
- np.random.rand(5, 5),
- columns=["open", "high", "low", "close", "volume"],
- index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"),
- )
- # this is a single date, so will raise
- with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"):
- df["2012-01-02 18:01:02"]
- msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central'\)"
- with pytest.raises(KeyError, match=msg):
- df[df.index[2]]
|