test_timezones.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. from datetime import (
  2. date,
  3. timedelta,
  4. )
  5. import numpy as np
  6. import pytest
  7. from pandas._libs.tslibs.timezones import maybe_get_tz
  8. import pandas.util._test_decorators as td
  9. import pandas as pd
  10. from pandas import (
  11. DataFrame,
  12. DatetimeIndex,
  13. Series,
  14. Timestamp,
  15. date_range,
  16. )
  17. import pandas._testing as tm
  18. from pandas.tests.io.pytables.common import (
  19. _maybe_remove,
  20. ensure_clean_store,
  21. )
  22. def _compare_with_tz(a, b):
  23. tm.assert_frame_equal(a, b)
  24. # compare the zones on each element
  25. for c in a.columns:
  26. for i in a.index:
  27. a_e = a.loc[i, c]
  28. b_e = b.loc[i, c]
  29. if not (a_e == b_e and a_e.tz == b_e.tz):
  30. raise AssertionError(f"invalid tz comparison [{a_e}] [{b_e}]")
  31. # use maybe_get_tz instead of dateutil.tz.gettz to handle the windows
  32. # filename issues.
  33. gettz_dateutil = lambda x: maybe_get_tz("dateutil/" + x)
  34. gettz_pytz = lambda x: x
  35. @pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz])
  36. def test_append_with_timezones(setup_path, gettz):
  37. # as columns
  38. # Single-tzinfo, no DST transition
  39. df_est = DataFrame(
  40. {
  41. "A": [
  42. Timestamp("20130102 2:00:00", tz=gettz("US/Eastern"))
  43. + timedelta(hours=1) * i
  44. for i in range(5)
  45. ]
  46. }
  47. )
  48. # frame with all columns having same tzinfo, but different sides
  49. # of DST transition
  50. df_crosses_dst = DataFrame(
  51. {
  52. "A": Timestamp("20130102", tz=gettz("US/Eastern")),
  53. "B": Timestamp("20130603", tz=gettz("US/Eastern")),
  54. },
  55. index=range(5),
  56. )
  57. df_mixed_tz = DataFrame(
  58. {
  59. "A": Timestamp("20130102", tz=gettz("US/Eastern")),
  60. "B": Timestamp("20130102", tz=gettz("EET")),
  61. },
  62. index=range(5),
  63. )
  64. df_different_tz = DataFrame(
  65. {
  66. "A": Timestamp("20130102", tz=gettz("US/Eastern")),
  67. "B": Timestamp("20130102", tz=gettz("CET")),
  68. },
  69. index=range(5),
  70. )
  71. with ensure_clean_store(setup_path) as store:
  72. _maybe_remove(store, "df_tz")
  73. store.append("df_tz", df_est, data_columns=["A"])
  74. result = store["df_tz"]
  75. _compare_with_tz(result, df_est)
  76. tm.assert_frame_equal(result, df_est)
  77. # select with tz aware
  78. expected = df_est[df_est.A >= df_est.A[3]]
  79. result = store.select("df_tz", where="A>=df_est.A[3]")
  80. _compare_with_tz(result, expected)
  81. # ensure we include dates in DST and STD time here.
  82. _maybe_remove(store, "df_tz")
  83. store.append("df_tz", df_crosses_dst)
  84. result = store["df_tz"]
  85. _compare_with_tz(result, df_crosses_dst)
  86. tm.assert_frame_equal(result, df_crosses_dst)
  87. msg = (
  88. r"invalid info for \[values_block_1\] for \[tz\], "
  89. r"existing_value \[(dateutil/.*)?US/Eastern\] "
  90. r"conflicts with new value \[(dateutil/.*)?EET\]"
  91. )
  92. with pytest.raises(ValueError, match=msg):
  93. store.append("df_tz", df_mixed_tz)
  94. # this is ok
  95. _maybe_remove(store, "df_tz")
  96. store.append("df_tz", df_mixed_tz, data_columns=["A", "B"])
  97. result = store["df_tz"]
  98. _compare_with_tz(result, df_mixed_tz)
  99. tm.assert_frame_equal(result, df_mixed_tz)
  100. # can't append with diff timezone
  101. msg = (
  102. r"invalid info for \[B\] for \[tz\], "
  103. r"existing_value \[(dateutil/.*)?EET\] "
  104. r"conflicts with new value \[(dateutil/.*)?CET\]"
  105. )
  106. with pytest.raises(ValueError, match=msg):
  107. store.append("df_tz", df_different_tz)
  108. @pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz])
  109. def test_append_with_timezones_as_index(setup_path, gettz):
  110. # GH#4098 example
  111. dti = date_range("2000-1-1", periods=3, freq="H", tz=gettz("US/Eastern"))
  112. dti = dti._with_freq(None) # freq doesn't round-trip
  113. df = DataFrame({"A": Series(range(3), index=dti)})
  114. with ensure_clean_store(setup_path) as store:
  115. _maybe_remove(store, "df")
  116. store.put("df", df)
  117. result = store.select("df")
  118. tm.assert_frame_equal(result, df)
  119. _maybe_remove(store, "df")
  120. store.append("df", df)
  121. result = store.select("df")
  122. tm.assert_frame_equal(result, df)
  123. def test_roundtrip_tz_aware_index(setup_path):
  124. # GH 17618
  125. time = Timestamp("2000-01-01 01:00:00", tz="US/Eastern")
  126. df = DataFrame(data=[0], index=[time])
  127. with ensure_clean_store(setup_path) as store:
  128. store.put("frame", df, format="fixed")
  129. recons = store["frame"]
  130. tm.assert_frame_equal(recons, df)
  131. assert recons.index[0]._value == 946706400000000000
  132. def test_store_index_name_with_tz(setup_path):
  133. # GH 13884
  134. df = DataFrame({"A": [1, 2]})
  135. df.index = DatetimeIndex([1234567890123456787, 1234567890123456788])
  136. df.index = df.index.tz_localize("UTC")
  137. df.index.name = "foo"
  138. with ensure_clean_store(setup_path) as store:
  139. store.put("frame", df, format="table")
  140. recons = store["frame"]
  141. tm.assert_frame_equal(recons, df)
  142. def test_tseries_select_index_column(setup_path):
  143. # GH7777
  144. # selecting a UTC datetimeindex column did
  145. # not preserve UTC tzinfo set before storing
  146. # check that no tz still works
  147. rng = date_range("1/1/2000", "1/30/2000")
  148. frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
  149. with ensure_clean_store(setup_path) as store:
  150. store.append("frame", frame)
  151. result = store.select_column("frame", "index")
  152. assert rng.tz == DatetimeIndex(result.values).tz
  153. # check utc
  154. rng = date_range("1/1/2000", "1/30/2000", tz="UTC")
  155. frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
  156. with ensure_clean_store(setup_path) as store:
  157. store.append("frame", frame)
  158. result = store.select_column("frame", "index")
  159. assert rng.tz == result.dt.tz
  160. # double check non-utc
  161. rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")
  162. frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
  163. with ensure_clean_store(setup_path) as store:
  164. store.append("frame", frame)
  165. result = store.select_column("frame", "index")
  166. assert rng.tz == result.dt.tz
  167. def test_timezones_fixed_format_frame_non_empty(setup_path):
  168. with ensure_clean_store(setup_path) as store:
  169. # index
  170. rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")
  171. rng = rng._with_freq(None) # freq doesn't round-trip
  172. df = DataFrame(np.random.randn(len(rng), 4), index=rng)
  173. store["df"] = df
  174. result = store["df"]
  175. tm.assert_frame_equal(result, df)
  176. # as data
  177. # GH11411
  178. _maybe_remove(store, "df")
  179. df = DataFrame(
  180. {
  181. "A": rng,
  182. "B": rng.tz_convert("UTC").tz_localize(None),
  183. "C": rng.tz_convert("CET"),
  184. "D": range(len(rng)),
  185. },
  186. index=rng,
  187. )
  188. store["df"] = df
  189. result = store["df"]
  190. tm.assert_frame_equal(result, df)
  191. def test_timezones_fixed_format_empty(setup_path, tz_aware_fixture, frame_or_series):
  192. # GH 20594
  193. dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)
  194. obj = Series(dtype=dtype, name="A")
  195. if frame_or_series is DataFrame:
  196. obj = obj.to_frame()
  197. with ensure_clean_store(setup_path) as store:
  198. store["obj"] = obj
  199. result = store["obj"]
  200. tm.assert_equal(result, obj)
  201. def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture):
  202. # GH 20594
  203. dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)
  204. with ensure_clean_store(setup_path) as store:
  205. s = Series([0], dtype=dtype)
  206. store["s"] = s
  207. result = store["s"]
  208. tm.assert_series_equal(result, s)
  209. def test_fixed_offset_tz(setup_path):
  210. rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00")
  211. frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
  212. with ensure_clean_store(setup_path) as store:
  213. store["frame"] = frame
  214. recons = store["frame"]
  215. tm.assert_index_equal(recons.index, rng)
  216. assert rng.tz == recons.index.tz
  217. @td.skip_if_windows
  218. def test_store_timezone(setup_path):
  219. # GH2852
  220. # issue storing datetime.date with a timezone as it resets when read
  221. # back in a new timezone
  222. # original method
  223. with ensure_clean_store(setup_path) as store:
  224. today = date(2013, 9, 10)
  225. df = DataFrame([1, 2, 3], index=[today, today, today])
  226. store["obj1"] = df
  227. result = store["obj1"]
  228. tm.assert_frame_equal(result, df)
  229. # with tz setting
  230. with ensure_clean_store(setup_path) as store:
  231. with tm.set_timezone("EST5EDT"):
  232. today = date(2013, 9, 10)
  233. df = DataFrame([1, 2, 3], index=[today, today, today])
  234. store["obj1"] = df
  235. with tm.set_timezone("CST6CDT"):
  236. result = store["obj1"]
  237. tm.assert_frame_equal(result, df)
  238. def test_legacy_datetimetz_object(datapath):
  239. # legacy from < 0.17.0
  240. # 8260
  241. expected = DataFrame(
  242. {
  243. "A": Timestamp("20130102", tz="US/Eastern"),
  244. "B": Timestamp("20130603", tz="CET"),
  245. },
  246. index=range(5),
  247. )
  248. with ensure_clean_store(
  249. datapath("io", "data", "legacy_hdf", "datetimetz_object.h5"), mode="r"
  250. ) as store:
  251. result = store["df"]
  252. tm.assert_frame_equal(result, expected)
  253. def test_dst_transitions(setup_path):
  254. # make sure we are not failing on transitions
  255. with ensure_clean_store(setup_path) as store:
  256. times = date_range(
  257. "2013-10-26 23:00",
  258. "2013-10-27 01:00",
  259. tz="Europe/London",
  260. freq="H",
  261. ambiguous="infer",
  262. )
  263. times = times._with_freq(None) # freq doesn't round-trip
  264. for i in [times, times + pd.Timedelta("10min")]:
  265. _maybe_remove(store, "df")
  266. df = DataFrame({"A": range(len(i)), "B": i}, index=i)
  267. store.append("df", df)
  268. result = store.select("df")
  269. tm.assert_frame_equal(result, df)
  270. def test_read_with_where_tz_aware_index(tmp_path, setup_path):
  271. # GH 11926
  272. periods = 10
  273. dts = date_range("20151201", periods=periods, freq="D", tz="UTC")
  274. mi = pd.MultiIndex.from_arrays([dts, range(periods)], names=["DATE", "NO"])
  275. expected = DataFrame({"MYCOL": 0}, index=mi)
  276. key = "mykey"
  277. path = tmp_path / setup_path
  278. with pd.HDFStore(path) as store:
  279. store.append(key, expected, format="table", append=True)
  280. result = pd.read_hdf(path, key, where="DATE > 20151130")
  281. tm.assert_frame_equal(result, expected)
  282. def test_py2_created_with_datetimez(datapath):
  283. # The test HDF5 file was created in Python 2, but could not be read in
  284. # Python 3.
  285. #
  286. # GH26443
  287. index = [Timestamp("2019-01-01T18:00").tz_localize("America/New_York")]
  288. expected = DataFrame({"data": 123}, index=index)
  289. with ensure_clean_store(
  290. datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r"
  291. ) as store:
  292. result = store["key"]
  293. tm.assert_frame_equal(result, expected)