test_datetime.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. """
  2. Also test support for datetime64[ns] in Series / DataFrame
  3. """
  4. from datetime import (
  5. datetime,
  6. timedelta,
  7. )
  8. import re
  9. from dateutil.tz import (
  10. gettz,
  11. tzutc,
  12. )
  13. import numpy as np
  14. import pytest
  15. import pytz
  16. from pandas._libs import index as libindex
  17. import pandas as pd
  18. from pandas import (
  19. DataFrame,
  20. Series,
  21. Timestamp,
  22. date_range,
  23. period_range,
  24. )
  25. import pandas._testing as tm
  26. def test_fancy_getitem():
  27. dti = date_range(
  28. freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
  29. )
  30. s = Series(np.arange(len(dti)), index=dti)
  31. assert s[48] == 48
  32. assert s["1/2/2009"] == 48
  33. assert s["2009-1-2"] == 48
  34. assert s[datetime(2009, 1, 2)] == 48
  35. assert s[Timestamp(datetime(2009, 1, 2))] == 48
  36. with pytest.raises(KeyError, match=r"^'2009-1-3'$"):
  37. s["2009-1-3"]
  38. tm.assert_series_equal(
  39. s["3/6/2009":"2009-06-05"], s[datetime(2009, 3, 6) : datetime(2009, 6, 5)]
  40. )
  41. def test_fancy_setitem():
  42. dti = date_range(
  43. freq="WOM-1FRI", start=datetime(2005, 1, 1), end=datetime(2010, 1, 1)
  44. )
  45. s = Series(np.arange(len(dti)), index=dti)
  46. s[48] = -1
  47. assert s[48] == -1
  48. s["1/2/2009"] = -2
  49. assert s[48] == -2
  50. s["1/2/2009":"2009-06-05"] = -3
  51. assert (s[48:54] == -3).all()
  52. @pytest.mark.parametrize("tz_source", ["pytz", "dateutil"])
  53. def test_getitem_setitem_datetime_tz(tz_source):
  54. if tz_source == "pytz":
  55. tzget = pytz.timezone
  56. else:
  57. # handle special case for utc in dateutil
  58. tzget = lambda x: tzutc() if x == "UTC" else gettz(x)
  59. N = 50
  60. # testing with timezone, GH #2785
  61. rng = date_range("1/1/1990", periods=N, freq="H", tz=tzget("US/Eastern"))
  62. ts = Series(np.random.randn(N), index=rng)
  63. # also test Timestamp tz handling, GH #2789
  64. result = ts.copy()
  65. result["1990-01-01 09:00:00+00:00"] = 0
  66. result["1990-01-01 09:00:00+00:00"] = ts[4]
  67. tm.assert_series_equal(result, ts)
  68. result = ts.copy()
  69. result["1990-01-01 03:00:00-06:00"] = 0
  70. result["1990-01-01 03:00:00-06:00"] = ts[4]
  71. tm.assert_series_equal(result, ts)
  72. # repeat with datetimes
  73. result = ts.copy()
  74. result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = 0
  75. result[datetime(1990, 1, 1, 9, tzinfo=tzget("UTC"))] = ts[4]
  76. tm.assert_series_equal(result, ts)
  77. result = ts.copy()
  78. dt = Timestamp(1990, 1, 1, 3).tz_localize(tzget("US/Central"))
  79. dt = dt.to_pydatetime()
  80. result[dt] = 0
  81. result[dt] = ts[4]
  82. tm.assert_series_equal(result, ts)
  83. def test_getitem_setitem_datetimeindex():
  84. N = 50
  85. # testing with timezone, GH #2785
  86. rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern")
  87. ts = Series(np.random.randn(N), index=rng)
  88. result = ts["1990-01-01 04:00:00"]
  89. expected = ts[4]
  90. assert result == expected
  91. result = ts.copy()
  92. result["1990-01-01 04:00:00"] = 0
  93. result["1990-01-01 04:00:00"] = ts[4]
  94. tm.assert_series_equal(result, ts)
  95. result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
  96. expected = ts[4:8]
  97. tm.assert_series_equal(result, expected)
  98. result = ts.copy()
  99. result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
  100. result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
  101. tm.assert_series_equal(result, ts)
  102. lb = "1990-01-01 04:00:00"
  103. rb = "1990-01-01 07:00:00"
  104. # GH#18435 strings get a pass from tzawareness compat
  105. result = ts[(ts.index >= lb) & (ts.index <= rb)]
  106. expected = ts[4:8]
  107. tm.assert_series_equal(result, expected)
  108. lb = "1990-01-01 04:00:00-0500"
  109. rb = "1990-01-01 07:00:00-0500"
  110. result = ts[(ts.index >= lb) & (ts.index <= rb)]
  111. expected = ts[4:8]
  112. tm.assert_series_equal(result, expected)
  113. # But we do not give datetimes a pass on tzawareness compat
  114. msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
  115. naive = datetime(1990, 1, 1, 4)
  116. for key in [naive, Timestamp(naive), np.datetime64(naive, "ns")]:
  117. with pytest.raises(KeyError, match=re.escape(repr(key))):
  118. # GH#36148 as of 2.0 we require tzawareness-compat
  119. ts[key]
  120. result = ts.copy()
  121. # GH#36148 as of 2.0 we do not ignore tzawareness mismatch in indexing,
  122. # so setting it as a new key casts to object rather than matching
  123. # rng[4]
  124. result[naive] = ts[4]
  125. assert result.index.dtype == object
  126. tm.assert_index_equal(result.index[:-1], rng.astype(object))
  127. assert result.index[-1] == naive
  128. msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
  129. with pytest.raises(TypeError, match=msg):
  130. # GH#36148 require tzawareness compat as of 2.0
  131. ts[naive : datetime(1990, 1, 1, 7)]
  132. result = ts.copy()
  133. with pytest.raises(TypeError, match=msg):
  134. # GH#36148 require tzawareness compat as of 2.0
  135. result[naive : datetime(1990, 1, 1, 7)] = 0
  136. with pytest.raises(TypeError, match=msg):
  137. # GH#36148 require tzawareness compat as of 2.0
  138. result[naive : datetime(1990, 1, 1, 7)] = 99
  139. # the __setitems__ here failed, so result should still match ts
  140. tm.assert_series_equal(result, ts)
  141. lb = naive
  142. rb = datetime(1990, 1, 1, 7)
  143. msg = r"Invalid comparison between dtype=datetime64\[ns, US/Eastern\] and datetime"
  144. with pytest.raises(TypeError, match=msg):
  145. # tznaive vs tzaware comparison is invalid
  146. # see GH#18376, GH#18162
  147. ts[(ts.index >= lb) & (ts.index <= rb)]
  148. lb = Timestamp(naive).tz_localize(rng.tzinfo)
  149. rb = Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo)
  150. result = ts[(ts.index >= lb) & (ts.index <= rb)]
  151. expected = ts[4:8]
  152. tm.assert_series_equal(result, expected)
  153. result = ts[ts.index[4]]
  154. expected = ts[4]
  155. assert result == expected
  156. result = ts[ts.index[4:8]]
  157. expected = ts[4:8]
  158. tm.assert_series_equal(result, expected)
  159. result = ts.copy()
  160. result[ts.index[4:8]] = 0
  161. result.iloc[4:8] = ts.iloc[4:8]
  162. tm.assert_series_equal(result, ts)
  163. # also test partial date slicing
  164. result = ts["1990-01-02"]
  165. expected = ts[24:48]
  166. tm.assert_series_equal(result, expected)
  167. result = ts.copy()
  168. result["1990-01-02"] = 0
  169. result["1990-01-02"] = ts[24:48]
  170. tm.assert_series_equal(result, ts)
  171. def test_getitem_setitem_periodindex():
  172. N = 50
  173. rng = period_range("1/1/1990", periods=N, freq="H")
  174. ts = Series(np.random.randn(N), index=rng)
  175. result = ts["1990-01-01 04"]
  176. expected = ts[4]
  177. assert result == expected
  178. result = ts.copy()
  179. result["1990-01-01 04"] = 0
  180. result["1990-01-01 04"] = ts[4]
  181. tm.assert_series_equal(result, ts)
  182. result = ts["1990-01-01 04":"1990-01-01 07"]
  183. expected = ts[4:8]
  184. tm.assert_series_equal(result, expected)
  185. result = ts.copy()
  186. result["1990-01-01 04":"1990-01-01 07"] = 0
  187. result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
  188. tm.assert_series_equal(result, ts)
  189. lb = "1990-01-01 04"
  190. rb = "1990-01-01 07"
  191. result = ts[(ts.index >= lb) & (ts.index <= rb)]
  192. expected = ts[4:8]
  193. tm.assert_series_equal(result, expected)
  194. # GH 2782
  195. result = ts[ts.index[4]]
  196. expected = ts[4]
  197. assert result == expected
  198. result = ts[ts.index[4:8]]
  199. expected = ts[4:8]
  200. tm.assert_series_equal(result, expected)
  201. result = ts.copy()
  202. result[ts.index[4:8]] = 0
  203. result.iloc[4:8] = ts.iloc[4:8]
  204. tm.assert_series_equal(result, ts)
  205. def test_datetime_indexing():
  206. index = date_range("1/1/2000", "1/7/2000")
  207. index = index.repeat(3)
  208. s = Series(len(index), index=index)
  209. stamp = Timestamp("1/8/2000")
  210. with pytest.raises(KeyError, match=re.escape(repr(stamp))):
  211. s[stamp]
  212. s[stamp] = 0
  213. assert s[stamp] == 0
  214. # not monotonic
  215. s = Series(len(index), index=index)
  216. s = s[::-1]
  217. with pytest.raises(KeyError, match=re.escape(repr(stamp))):
  218. s[stamp]
  219. s[stamp] = 0
  220. assert s[stamp] == 0
  221. # test duplicates in time series
  222. def test_indexing_with_duplicate_datetimeindex(
  223. rand_series_with_duplicate_datetimeindex,
  224. ):
  225. ts = rand_series_with_duplicate_datetimeindex
  226. uniques = ts.index.unique()
  227. for date in uniques:
  228. result = ts[date]
  229. mask = ts.index == date
  230. total = (ts.index == date).sum()
  231. expected = ts[mask]
  232. if total > 1:
  233. tm.assert_series_equal(result, expected)
  234. else:
  235. tm.assert_almost_equal(result, expected[0])
  236. cp = ts.copy()
  237. cp[date] = 0
  238. expected = Series(np.where(mask, 0, ts), index=ts.index)
  239. tm.assert_series_equal(cp, expected)
  240. key = datetime(2000, 1, 6)
  241. with pytest.raises(KeyError, match=re.escape(repr(key))):
  242. ts[key]
  243. # new index
  244. ts[datetime(2000, 1, 6)] = 0
  245. assert ts[datetime(2000, 1, 6)] == 0
  246. def test_loc_getitem_over_size_cutoff(monkeypatch):
  247. # #1821
  248. monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
  249. # create large list of non periodic datetime
  250. dates = []
  251. sec = timedelta(seconds=1)
  252. half_sec = timedelta(microseconds=500000)
  253. d = datetime(2011, 12, 5, 20, 30)
  254. n = 1100
  255. for i in range(n):
  256. dates.append(d)
  257. dates.append(d + sec)
  258. dates.append(d + sec + half_sec)
  259. dates.append(d + sec + sec + half_sec)
  260. d += 3 * sec
  261. # duplicate some values in the list
  262. duplicate_positions = np.random.randint(0, len(dates) - 1, 20)
  263. for p in duplicate_positions:
  264. dates[p + 1] = dates[p]
  265. df = DataFrame(np.random.randn(len(dates), 4), index=dates, columns=list("ABCD"))
  266. pos = n * 3
  267. timestamp = df.index[pos]
  268. assert timestamp in df.index
  269. # it works!
  270. df.loc[timestamp]
  271. assert len(df.loc[[timestamp]]) > 0
  272. def test_indexing_over_size_cutoff_period_index(monkeypatch):
  273. # GH 27136
  274. monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 1000)
  275. n = 1100
  276. idx = period_range("1/1/2000", freq="T", periods=n)
  277. assert idx._engine.over_size_threshold
  278. s = Series(np.random.randn(len(idx)), index=idx)
  279. pos = n - 1
  280. timestamp = idx[pos]
  281. assert timestamp in s.index
  282. # it works!
  283. s[timestamp]
  284. assert len(s.loc[[timestamp]]) > 0
  285. def test_indexing_unordered():
  286. # GH 2437
  287. rng = date_range(start="2011-01-01", end="2011-01-15")
  288. ts = Series(np.random.rand(len(rng)), index=rng)
  289. ts2 = pd.concat([ts[0:4], ts[-4:], ts[4:-4]])
  290. for t in ts.index:
  291. expected = ts[t]
  292. result = ts2[t]
  293. assert expected == result
  294. # GH 3448 (ranges)
  295. def compare(slobj):
  296. result = ts2[slobj].copy()
  297. result = result.sort_index()
  298. expected = ts[slobj]
  299. expected.index = expected.index._with_freq(None)
  300. tm.assert_series_equal(result, expected)
  301. compare(slice("2011-01-01", "2011-01-15"))
  302. with pytest.raises(KeyError, match="Value based partial slicing on non-monotonic"):
  303. compare(slice("2010-12-30", "2011-01-15"))
  304. compare(slice("2011-01-01", "2011-01-16"))
  305. # partial ranges
  306. compare(slice("2011-01-01", "2011-01-6"))
  307. compare(slice("2011-01-06", "2011-01-8"))
  308. compare(slice("2011-01-06", "2011-01-12"))
  309. # single values
  310. result = ts2["2011"].sort_index()
  311. expected = ts["2011"]
  312. expected.index = expected.index._with_freq(None)
  313. tm.assert_series_equal(result, expected)
  314. def test_indexing_unordered2():
  315. # diff freq
  316. rng = date_range(datetime(2005, 1, 1), periods=20, freq="M")
  317. ts = Series(np.arange(len(rng)), index=rng)
  318. ts = ts.take(np.random.permutation(20))
  319. result = ts["2005"]
  320. for t in result.index:
  321. assert t.year == 2005
  322. def test_indexing():
  323. idx = date_range("2001-1-1", periods=20, freq="M")
  324. ts = Series(np.random.rand(len(idx)), index=idx)
  325. # getting
  326. # GH 3070, make sure semantics work on Series/Frame
  327. expected = ts["2001"]
  328. expected.name = "A"
  329. df = DataFrame({"A": ts})
  330. # GH#36179 pre-2.0 df["2001"] operated as slicing on rows. in 2.0 it behaves
  331. # like any other key, so raises
  332. with pytest.raises(KeyError, match="2001"):
  333. df["2001"]
  334. # setting
  335. ts["2001"] = 1
  336. expected = ts["2001"]
  337. expected.name = "A"
  338. df.loc["2001", "A"] = 1
  339. with pytest.raises(KeyError, match="2001"):
  340. df["2001"]
  341. def test_getitem_str_month_with_datetimeindex():
  342. # GH3546 (not including times on the last day)
  343. idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:00", freq="H")
  344. ts = Series(range(len(idx)), index=idx)
  345. expected = ts["2013-05"]
  346. tm.assert_series_equal(expected, ts)
  347. idx = date_range(start="2013-05-31 00:00", end="2013-05-31 23:59", freq="S")
  348. ts = Series(range(len(idx)), index=idx)
  349. expected = ts["2013-05"]
  350. tm.assert_series_equal(expected, ts)
  351. def test_getitem_str_year_with_datetimeindex():
  352. idx = [
  353. Timestamp("2013-05-31 00:00"),
  354. Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999)),
  355. ]
  356. ts = Series(range(len(idx)), index=idx)
  357. expected = ts["2013"]
  358. tm.assert_series_equal(expected, ts)
  359. def test_getitem_str_second_with_datetimeindex():
  360. # GH14826, indexing with a seconds resolution string / datetime object
  361. df = DataFrame(
  362. np.random.rand(5, 5),
  363. columns=["open", "high", "low", "close", "volume"],
  364. index=date_range("2012-01-02 18:01:00", periods=5, tz="US/Central", freq="s"),
  365. )
  366. # this is a single date, so will raise
  367. with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"):
  368. df["2012-01-02 18:01:02"]
  369. msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central'\)"
  370. with pytest.raises(KeyError, match=msg):
  371. df[df.index[2]]