test_datetimes.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
  1. import datetime as dt
  2. from datetime import datetime
  3. import dateutil
  4. import numpy as np
  5. import pytest
  6. import pandas as pd
  7. from pandas import (
  8. DataFrame,
  9. DatetimeIndex,
  10. Index,
  11. MultiIndex,
  12. Series,
  13. Timestamp,
  14. concat,
  15. date_range,
  16. to_timedelta,
  17. )
  18. import pandas._testing as tm
  19. class TestDatetimeConcat:
  20. def test_concat_datetime64_block(self):
  21. rng = date_range("1/1/2000", periods=10)
  22. df = DataFrame({"time": rng})
  23. result = concat([df, df])
  24. assert (result.iloc[:10]["time"] == rng).all()
  25. assert (result.iloc[10:]["time"] == rng).all()
  26. def test_concat_datetime_datetime64_frame(self):
  27. # GH#2624
  28. rows = []
  29. rows.append([datetime(2010, 1, 1), 1])
  30. rows.append([datetime(2010, 1, 2), "hi"])
  31. df2_obj = DataFrame.from_records(rows, columns=["date", "test"])
  32. ind = date_range(start="2000/1/1", freq="D", periods=10)
  33. df1 = DataFrame({"date": ind, "test": range(10)})
  34. # it works!
  35. concat([df1, df2_obj])
  36. def test_concat_datetime_timezone(self):
  37. # GH 18523
  38. idx1 = date_range("2011-01-01", periods=3, freq="H", tz="Europe/Paris")
  39. idx2 = date_range(start=idx1[0], end=idx1[-1], freq="H")
  40. df1 = DataFrame({"a": [1, 2, 3]}, index=idx1)
  41. df2 = DataFrame({"b": [1, 2, 3]}, index=idx2)
  42. result = concat([df1, df2], axis=1)
  43. exp_idx = (
  44. DatetimeIndex(
  45. [
  46. "2011-01-01 00:00:00+01:00",
  47. "2011-01-01 01:00:00+01:00",
  48. "2011-01-01 02:00:00+01:00",
  49. ],
  50. freq="H",
  51. )
  52. .tz_convert("UTC")
  53. .tz_convert("Europe/Paris")
  54. )
  55. expected = DataFrame(
  56. [[1, 1], [2, 2], [3, 3]], index=exp_idx, columns=["a", "b"]
  57. )
  58. tm.assert_frame_equal(result, expected)
  59. idx3 = date_range("2011-01-01", periods=3, freq="H", tz="Asia/Tokyo")
  60. df3 = DataFrame({"b": [1, 2, 3]}, index=idx3)
  61. result = concat([df1, df3], axis=1)
  62. exp_idx = DatetimeIndex(
  63. [
  64. "2010-12-31 15:00:00+00:00",
  65. "2010-12-31 16:00:00+00:00",
  66. "2010-12-31 17:00:00+00:00",
  67. "2010-12-31 23:00:00+00:00",
  68. "2011-01-01 00:00:00+00:00",
  69. "2011-01-01 01:00:00+00:00",
  70. ]
  71. )
  72. expected = DataFrame(
  73. [
  74. [np.nan, 1],
  75. [np.nan, 2],
  76. [np.nan, 3],
  77. [1, np.nan],
  78. [2, np.nan],
  79. [3, np.nan],
  80. ],
  81. index=exp_idx,
  82. columns=["a", "b"],
  83. )
  84. tm.assert_frame_equal(result, expected)
  85. # GH 13783: Concat after resample
  86. result = concat([df1.resample("H").mean(), df2.resample("H").mean()], sort=True)
  87. expected = DataFrame(
  88. {"a": [1, 2, 3] + [np.nan] * 3, "b": [np.nan] * 3 + [1, 2, 3]},
  89. index=idx1.append(idx1),
  90. )
  91. tm.assert_frame_equal(result, expected)
  92. def test_concat_datetimeindex_freq(self):
  93. # GH 3232
  94. # Monotonic index result
  95. dr = date_range("01-Jan-2013", periods=100, freq="50L", tz="UTC")
  96. data = list(range(100))
  97. expected = DataFrame(data, index=dr)
  98. result = concat([expected[:50], expected[50:]])
  99. tm.assert_frame_equal(result, expected)
  100. # Non-monotonic index result
  101. result = concat([expected[50:], expected[:50]])
  102. expected = DataFrame(data[50:] + data[:50], index=dr[50:].append(dr[:50]))
  103. expected.index._data.freq = None
  104. tm.assert_frame_equal(result, expected)
  105. def test_concat_multiindex_datetime_object_index(self):
  106. # https://github.com/pandas-dev/pandas/issues/11058
  107. idx = Index(
  108. [dt.date(2013, 1, 1), dt.date(2014, 1, 1), dt.date(2015, 1, 1)],
  109. dtype="object",
  110. )
  111. s = Series(
  112. ["a", "b"],
  113. index=MultiIndex.from_arrays(
  114. [
  115. [1, 2],
  116. idx[:-1],
  117. ],
  118. names=["first", "second"],
  119. ),
  120. )
  121. s2 = Series(
  122. ["a", "b"],
  123. index=MultiIndex.from_arrays(
  124. [[1, 2], idx[::2]],
  125. names=["first", "second"],
  126. ),
  127. )
  128. mi = MultiIndex.from_arrays(
  129. [[1, 2, 2], idx],
  130. names=["first", "second"],
  131. )
  132. assert mi.levels[1].dtype == object
  133. expected = DataFrame(
  134. [["a", "a"], ["b", np.nan], [np.nan, "b"]],
  135. index=mi,
  136. )
  137. result = concat([s, s2], axis=1)
  138. tm.assert_frame_equal(result, expected)
  139. def test_concat_NaT_series(self):
  140. # GH 11693
  141. # test for merging NaT series with datetime series.
  142. x = Series(
  143. date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="US/Eastern")
  144. )
  145. y = Series(pd.NaT, index=[0, 1], dtype="datetime64[ns, US/Eastern]")
  146. expected = Series([x[0], x[1], pd.NaT, pd.NaT])
  147. result = concat([x, y], ignore_index=True)
  148. tm.assert_series_equal(result, expected)
  149. # all NaT with tz
  150. expected = Series(pd.NaT, index=range(4), dtype="datetime64[ns, US/Eastern]")
  151. result = concat([y, y], ignore_index=True)
  152. tm.assert_series_equal(result, expected)
  153. # without tz
  154. x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h"))
  155. y = Series(date_range("20151124 10:00", "20151124 11:00", freq="1h"))
  156. y[:] = pd.NaT
  157. expected = Series([x[0], x[1], pd.NaT, pd.NaT])
  158. result = concat([x, y], ignore_index=True)
  159. tm.assert_series_equal(result, expected)
  160. # all NaT without tz
  161. x[:] = pd.NaT
  162. expected = Series(pd.NaT, index=range(4), dtype="datetime64[ns]")
  163. result = concat([x, y], ignore_index=True)
  164. tm.assert_series_equal(result, expected)
  165. @pytest.mark.parametrize("tz", [None, "UTC"])
  166. def test_concat_NaT_dataframes(self, tz):
  167. # GH 12396
  168. first = DataFrame([[pd.NaT], [pd.NaT]])
  169. first = first.apply(lambda x: x.dt.tz_localize(tz))
  170. second = DataFrame(
  171. [[Timestamp("2015/01/01", tz=tz)], [Timestamp("2016/01/01", tz=tz)]],
  172. index=[2, 3],
  173. )
  174. expected = DataFrame(
  175. [
  176. pd.NaT,
  177. pd.NaT,
  178. Timestamp("2015/01/01", tz=tz),
  179. Timestamp("2016/01/01", tz=tz),
  180. ]
  181. )
  182. result = concat([first, second], axis=0)
  183. tm.assert_frame_equal(result, expected)
  184. @pytest.mark.parametrize("tz1", [None, "UTC"])
  185. @pytest.mark.parametrize("tz2", [None, "UTC"])
  186. @pytest.mark.parametrize("s", [pd.NaT, Timestamp("20150101")])
  187. def test_concat_NaT_dataframes_all_NaT_axis_0(self, tz1, tz2, s):
  188. # GH 12396
  189. # tz-naive
  190. first = DataFrame([[pd.NaT], [pd.NaT]]).apply(lambda x: x.dt.tz_localize(tz1))
  191. second = DataFrame([s]).apply(lambda x: x.dt.tz_localize(tz2))
  192. result = concat([first, second], axis=0)
  193. expected = DataFrame(Series([pd.NaT, pd.NaT, s], index=[0, 1, 0]))
  194. expected = expected.apply(lambda x: x.dt.tz_localize(tz2))
  195. if tz1 != tz2:
  196. expected = expected.astype(object)
  197. tm.assert_frame_equal(result, expected)
  198. @pytest.mark.parametrize("tz1", [None, "UTC"])
  199. @pytest.mark.parametrize("tz2", [None, "UTC"])
  200. def test_concat_NaT_dataframes_all_NaT_axis_1(self, tz1, tz2):
  201. # GH 12396
  202. first = DataFrame(Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1))
  203. second = DataFrame(Series([pd.NaT]).dt.tz_localize(tz2), columns=[1])
  204. expected = DataFrame(
  205. {
  206. 0: Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1),
  207. 1: Series([pd.NaT, pd.NaT]).dt.tz_localize(tz2),
  208. }
  209. )
  210. result = concat([first, second], axis=1)
  211. tm.assert_frame_equal(result, expected)
  212. @pytest.mark.parametrize("tz1", [None, "UTC"])
  213. @pytest.mark.parametrize("tz2", [None, "UTC"])
  214. def test_concat_NaT_series_dataframe_all_NaT(self, tz1, tz2):
  215. # GH 12396
  216. # tz-naive
  217. first = Series([pd.NaT, pd.NaT]).dt.tz_localize(tz1)
  218. second = DataFrame(
  219. [
  220. [Timestamp("2015/01/01", tz=tz2)],
  221. [Timestamp("2016/01/01", tz=tz2)],
  222. ],
  223. index=[2, 3],
  224. )
  225. expected = DataFrame(
  226. [
  227. pd.NaT,
  228. pd.NaT,
  229. Timestamp("2015/01/01", tz=tz2),
  230. Timestamp("2016/01/01", tz=tz2),
  231. ]
  232. )
  233. if tz1 != tz2:
  234. expected = expected.astype(object)
  235. result = concat([first, second])
  236. tm.assert_frame_equal(result, expected)
  237. class TestTimezoneConcat:
  238. def test_concat_tz_series(self):
  239. # gh-11755: tz and no tz
  240. x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="UTC"))
  241. y = Series(date_range("2012-01-01", "2012-01-02"))
  242. expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
  243. result = concat([x, y], ignore_index=True)
  244. tm.assert_series_equal(result, expected)
  245. # gh-11887: concat tz and object
  246. x = Series(date_range("20151124 08:00", "20151124 09:00", freq="1h", tz="UTC"))
  247. y = Series(["a", "b"])
  248. expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
  249. result = concat([x, y], ignore_index=True)
  250. tm.assert_series_equal(result, expected)
  251. # see gh-12217 and gh-12306
  252. # Concatenating two UTC times
  253. first = DataFrame([[datetime(2016, 1, 1)]])
  254. first[0] = first[0].dt.tz_localize("UTC")
  255. second = DataFrame([[datetime(2016, 1, 2)]])
  256. second[0] = second[0].dt.tz_localize("UTC")
  257. result = concat([first, second])
  258. assert result[0].dtype == "datetime64[ns, UTC]"
  259. # Concatenating two London times
  260. first = DataFrame([[datetime(2016, 1, 1)]])
  261. first[0] = first[0].dt.tz_localize("Europe/London")
  262. second = DataFrame([[datetime(2016, 1, 2)]])
  263. second[0] = second[0].dt.tz_localize("Europe/London")
  264. result = concat([first, second])
  265. assert result[0].dtype == "datetime64[ns, Europe/London]"
  266. # Concatenating 2+1 London times
  267. first = DataFrame([[datetime(2016, 1, 1)], [datetime(2016, 1, 2)]])
  268. first[0] = first[0].dt.tz_localize("Europe/London")
  269. second = DataFrame([[datetime(2016, 1, 3)]])
  270. second[0] = second[0].dt.tz_localize("Europe/London")
  271. result = concat([first, second])
  272. assert result[0].dtype == "datetime64[ns, Europe/London]"
  273. # Concat'ing 1+2 London times
  274. first = DataFrame([[datetime(2016, 1, 1)]])
  275. first[0] = first[0].dt.tz_localize("Europe/London")
  276. second = DataFrame([[datetime(2016, 1, 2)], [datetime(2016, 1, 3)]])
  277. second[0] = second[0].dt.tz_localize("Europe/London")
  278. result = concat([first, second])
  279. assert result[0].dtype == "datetime64[ns, Europe/London]"
  280. def test_concat_tz_series_tzlocal(self):
  281. # see gh-13583
  282. x = [
  283. Timestamp("2011-01-01", tz=dateutil.tz.tzlocal()),
  284. Timestamp("2011-02-01", tz=dateutil.tz.tzlocal()),
  285. ]
  286. y = [
  287. Timestamp("2012-01-01", tz=dateutil.tz.tzlocal()),
  288. Timestamp("2012-02-01", tz=dateutil.tz.tzlocal()),
  289. ]
  290. result = concat([Series(x), Series(y)], ignore_index=True)
  291. tm.assert_series_equal(result, Series(x + y))
  292. assert result.dtype == "datetime64[ns, tzlocal()]"
  293. def test_concat_tz_series_with_datetimelike(self):
  294. # see gh-12620: tz and timedelta
  295. x = [
  296. Timestamp("2011-01-01", tz="US/Eastern"),
  297. Timestamp("2011-02-01", tz="US/Eastern"),
  298. ]
  299. y = [pd.Timedelta("1 day"), pd.Timedelta("2 day")]
  300. result = concat([Series(x), Series(y)], ignore_index=True)
  301. tm.assert_series_equal(result, Series(x + y, dtype="object"))
  302. # tz and period
  303. y = [pd.Period("2011-03", freq="M"), pd.Period("2011-04", freq="M")]
  304. result = concat([Series(x), Series(y)], ignore_index=True)
  305. tm.assert_series_equal(result, Series(x + y, dtype="object"))
  306. def test_concat_tz_frame(self):
  307. df2 = DataFrame(
  308. {
  309. "A": Timestamp("20130102", tz="US/Eastern"),
  310. "B": Timestamp("20130603", tz="CET"),
  311. },
  312. index=range(5),
  313. )
  314. # concat
  315. df3 = concat([df2.A.to_frame(), df2.B.to_frame()], axis=1)
  316. tm.assert_frame_equal(df2, df3)
  317. def test_concat_multiple_tzs(self):
  318. # GH#12467
  319. # combining datetime tz-aware and naive DataFrames
  320. ts1 = Timestamp("2015-01-01", tz=None)
  321. ts2 = Timestamp("2015-01-01", tz="UTC")
  322. ts3 = Timestamp("2015-01-01", tz="EST")
  323. df1 = DataFrame({"time": [ts1]})
  324. df2 = DataFrame({"time": [ts2]})
  325. df3 = DataFrame({"time": [ts3]})
  326. results = concat([df1, df2]).reset_index(drop=True)
  327. expected = DataFrame({"time": [ts1, ts2]}, dtype=object)
  328. tm.assert_frame_equal(results, expected)
  329. results = concat([df1, df3]).reset_index(drop=True)
  330. expected = DataFrame({"time": [ts1, ts3]}, dtype=object)
  331. tm.assert_frame_equal(results, expected)
  332. results = concat([df2, df3]).reset_index(drop=True)
  333. expected = DataFrame({"time": [ts2, ts3]})
  334. tm.assert_frame_equal(results, expected)
  335. def test_concat_multiindex_with_tz(self):
  336. # GH 6606
  337. df = DataFrame(
  338. {
  339. "dt": [
  340. datetime(2014, 1, 1),
  341. datetime(2014, 1, 2),
  342. datetime(2014, 1, 3),
  343. ],
  344. "b": ["A", "B", "C"],
  345. "c": [1, 2, 3],
  346. "d": [4, 5, 6],
  347. }
  348. )
  349. df["dt"] = df["dt"].apply(lambda d: Timestamp(d, tz="US/Pacific"))
  350. df = df.set_index(["dt", "b"])
  351. exp_idx1 = DatetimeIndex(
  352. ["2014-01-01", "2014-01-02", "2014-01-03"] * 2, tz="US/Pacific", name="dt"
  353. )
  354. exp_idx2 = Index(["A", "B", "C"] * 2, name="b")
  355. exp_idx = MultiIndex.from_arrays([exp_idx1, exp_idx2])
  356. expected = DataFrame(
  357. {"c": [1, 2, 3] * 2, "d": [4, 5, 6] * 2}, index=exp_idx, columns=["c", "d"]
  358. )
  359. result = concat([df, df])
  360. tm.assert_frame_equal(result, expected)
  361. def test_concat_tz_not_aligned(self):
  362. # GH#22796
  363. ts = pd.to_datetime([1, 2]).tz_localize("UTC")
  364. a = DataFrame({"A": ts})
  365. b = DataFrame({"A": ts, "B": ts})
  366. result = concat([a, b], sort=True, ignore_index=True)
  367. expected = DataFrame(
  368. {"A": list(ts) + list(ts), "B": [pd.NaT, pd.NaT] + list(ts)}
  369. )
  370. tm.assert_frame_equal(result, expected)
  371. @pytest.mark.parametrize(
  372. "t1",
  373. [
  374. "2015-01-01",
  375. pytest.param(
  376. pd.NaT,
  377. marks=pytest.mark.xfail(
  378. reason="GH23037 incorrect dtype when concatenating"
  379. ),
  380. ),
  381. ],
  382. )
  383. def test_concat_tz_NaT(self, t1):
  384. # GH#22796
  385. # Concatenating tz-aware multicolumn DataFrames
  386. ts1 = Timestamp(t1, tz="UTC")
  387. ts2 = Timestamp("2015-01-01", tz="UTC")
  388. ts3 = Timestamp("2015-01-01", tz="UTC")
  389. df1 = DataFrame([[ts1, ts2]])
  390. df2 = DataFrame([[ts3]])
  391. result = concat([df1, df2])
  392. expected = DataFrame([[ts1, ts2], [ts3, pd.NaT]], index=[0, 0])
  393. tm.assert_frame_equal(result, expected)
  394. def test_concat_tz_with_empty(self):
  395. # GH 9188
  396. result = concat(
  397. [DataFrame(date_range("2000", periods=1, tz="UTC")), DataFrame()]
  398. )
  399. expected = DataFrame(date_range("2000", periods=1, tz="UTC"))
  400. tm.assert_frame_equal(result, expected)
  401. class TestPeriodConcat:
  402. def test_concat_period_series(self):
  403. x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
  404. y = Series(pd.PeriodIndex(["2015-10-01", "2016-01-01"], freq="D"))
  405. expected = Series([x[0], x[1], y[0], y[1]], dtype="Period[D]")
  406. result = concat([x, y], ignore_index=True)
  407. tm.assert_series_equal(result, expected)
  408. def test_concat_period_multiple_freq_series(self):
  409. x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
  410. y = Series(pd.PeriodIndex(["2015-10-01", "2016-01-01"], freq="M"))
  411. expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
  412. result = concat([x, y], ignore_index=True)
  413. tm.assert_series_equal(result, expected)
  414. assert result.dtype == "object"
  415. def test_concat_period_other_series(self):
  416. x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
  417. y = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="M"))
  418. expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
  419. result = concat([x, y], ignore_index=True)
  420. tm.assert_series_equal(result, expected)
  421. assert result.dtype == "object"
  422. # non-period
  423. x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
  424. y = Series(DatetimeIndex(["2015-11-01", "2015-12-01"]))
  425. expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
  426. result = concat([x, y], ignore_index=True)
  427. tm.assert_series_equal(result, expected)
  428. assert result.dtype == "object"
  429. x = Series(pd.PeriodIndex(["2015-11-01", "2015-12-01"], freq="D"))
  430. y = Series(["A", "B"])
  431. expected = Series([x[0], x[1], y[0], y[1]], dtype="object")
  432. result = concat([x, y], ignore_index=True)
  433. tm.assert_series_equal(result, expected)
  434. assert result.dtype == "object"
  435. def test_concat_timedelta64_block():
  436. rng = to_timedelta(np.arange(10), unit="s")
  437. df = DataFrame({"time": rng})
  438. result = concat([df, df])
  439. tm.assert_frame_equal(result.iloc[:10], df)
  440. tm.assert_frame_equal(result.iloc[10:], df)
  441. def test_concat_multiindex_datetime_nat():
  442. # GH#44900
  443. left = DataFrame({"a": 1}, index=MultiIndex.from_tuples([(1, pd.NaT)]))
  444. right = DataFrame(
  445. {"b": 2}, index=MultiIndex.from_tuples([(1, pd.NaT), (2, pd.NaT)])
  446. )
  447. result = concat([left, right], axis="columns")
  448. expected = DataFrame(
  449. {"a": [1.0, np.nan], "b": 2}, MultiIndex.from_tuples([(1, pd.NaT), (2, pd.NaT)])
  450. )
  451. tm.assert_frame_equal(result, expected)