123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364 |
- from datetime import (
- date,
- timedelta,
- )
- import numpy as np
- import pytest
- from pandas._libs.tslibs.timezones import maybe_get_tz
- import pandas.util._test_decorators as td
- import pandas as pd
- from pandas import (
- DataFrame,
- DatetimeIndex,
- Series,
- Timestamp,
- date_range,
- )
- import pandas._testing as tm
- from pandas.tests.io.pytables.common import (
- _maybe_remove,
- ensure_clean_store,
- )
- def _compare_with_tz(a, b):
- tm.assert_frame_equal(a, b)
- # compare the zones on each element
- for c in a.columns:
- for i in a.index:
- a_e = a.loc[i, c]
- b_e = b.loc[i, c]
- if not (a_e == b_e and a_e.tz == b_e.tz):
- raise AssertionError(f"invalid tz comparison [{a_e}] [{b_e}]")
- # use maybe_get_tz instead of dateutil.tz.gettz to handle the windows
- # filename issues.
- gettz_dateutil = lambda x: maybe_get_tz("dateutil/" + x)
- gettz_pytz = lambda x: x
- @pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz])
- def test_append_with_timezones(setup_path, gettz):
- # as columns
- # Single-tzinfo, no DST transition
- df_est = DataFrame(
- {
- "A": [
- Timestamp("20130102 2:00:00", tz=gettz("US/Eastern"))
- + timedelta(hours=1) * i
- for i in range(5)
- ]
- }
- )
- # frame with all columns having same tzinfo, but different sides
- # of DST transition
- df_crosses_dst = DataFrame(
- {
- "A": Timestamp("20130102", tz=gettz("US/Eastern")),
- "B": Timestamp("20130603", tz=gettz("US/Eastern")),
- },
- index=range(5),
- )
- df_mixed_tz = DataFrame(
- {
- "A": Timestamp("20130102", tz=gettz("US/Eastern")),
- "B": Timestamp("20130102", tz=gettz("EET")),
- },
- index=range(5),
- )
- df_different_tz = DataFrame(
- {
- "A": Timestamp("20130102", tz=gettz("US/Eastern")),
- "B": Timestamp("20130102", tz=gettz("CET")),
- },
- index=range(5),
- )
- with ensure_clean_store(setup_path) as store:
- _maybe_remove(store, "df_tz")
- store.append("df_tz", df_est, data_columns=["A"])
- result = store["df_tz"]
- _compare_with_tz(result, df_est)
- tm.assert_frame_equal(result, df_est)
- # select with tz aware
- expected = df_est[df_est.A >= df_est.A[3]]
- result = store.select("df_tz", where="A>=df_est.A[3]")
- _compare_with_tz(result, expected)
- # ensure we include dates in DST and STD time here.
- _maybe_remove(store, "df_tz")
- store.append("df_tz", df_crosses_dst)
- result = store["df_tz"]
- _compare_with_tz(result, df_crosses_dst)
- tm.assert_frame_equal(result, df_crosses_dst)
- msg = (
- r"invalid info for \[values_block_1\] for \[tz\], "
- r"existing_value \[(dateutil/.*)?US/Eastern\] "
- r"conflicts with new value \[(dateutil/.*)?EET\]"
- )
- with pytest.raises(ValueError, match=msg):
- store.append("df_tz", df_mixed_tz)
- # this is ok
- _maybe_remove(store, "df_tz")
- store.append("df_tz", df_mixed_tz, data_columns=["A", "B"])
- result = store["df_tz"]
- _compare_with_tz(result, df_mixed_tz)
- tm.assert_frame_equal(result, df_mixed_tz)
- # can't append with diff timezone
- msg = (
- r"invalid info for \[B\] for \[tz\], "
- r"existing_value \[(dateutil/.*)?EET\] "
- r"conflicts with new value \[(dateutil/.*)?CET\]"
- )
- with pytest.raises(ValueError, match=msg):
- store.append("df_tz", df_different_tz)
- @pytest.mark.parametrize("gettz", [gettz_dateutil, gettz_pytz])
- def test_append_with_timezones_as_index(setup_path, gettz):
- # GH#4098 example
- dti = date_range("2000-1-1", periods=3, freq="H", tz=gettz("US/Eastern"))
- dti = dti._with_freq(None) # freq doesn't round-trip
- df = DataFrame({"A": Series(range(3), index=dti)})
- with ensure_clean_store(setup_path) as store:
- _maybe_remove(store, "df")
- store.put("df", df)
- result = store.select("df")
- tm.assert_frame_equal(result, df)
- _maybe_remove(store, "df")
- store.append("df", df)
- result = store.select("df")
- tm.assert_frame_equal(result, df)
- def test_roundtrip_tz_aware_index(setup_path):
- # GH 17618
- time = Timestamp("2000-01-01 01:00:00", tz="US/Eastern")
- df = DataFrame(data=[0], index=[time])
- with ensure_clean_store(setup_path) as store:
- store.put("frame", df, format="fixed")
- recons = store["frame"]
- tm.assert_frame_equal(recons, df)
- assert recons.index[0]._value == 946706400000000000
- def test_store_index_name_with_tz(setup_path):
- # GH 13884
- df = DataFrame({"A": [1, 2]})
- df.index = DatetimeIndex([1234567890123456787, 1234567890123456788])
- df.index = df.index.tz_localize("UTC")
- df.index.name = "foo"
- with ensure_clean_store(setup_path) as store:
- store.put("frame", df, format="table")
- recons = store["frame"]
- tm.assert_frame_equal(recons, df)
- def test_tseries_select_index_column(setup_path):
- # GH7777
- # selecting a UTC datetimeindex column did
- # not preserve UTC tzinfo set before storing
- # check that no tz still works
- rng = date_range("1/1/2000", "1/30/2000")
- frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
- with ensure_clean_store(setup_path) as store:
- store.append("frame", frame)
- result = store.select_column("frame", "index")
- assert rng.tz == DatetimeIndex(result.values).tz
- # check utc
- rng = date_range("1/1/2000", "1/30/2000", tz="UTC")
- frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
- with ensure_clean_store(setup_path) as store:
- store.append("frame", frame)
- result = store.select_column("frame", "index")
- assert rng.tz == result.dt.tz
- # double check non-utc
- rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")
- frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
- with ensure_clean_store(setup_path) as store:
- store.append("frame", frame)
- result = store.select_column("frame", "index")
- assert rng.tz == result.dt.tz
- def test_timezones_fixed_format_frame_non_empty(setup_path):
- with ensure_clean_store(setup_path) as store:
- # index
- rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")
- rng = rng._with_freq(None) # freq doesn't round-trip
- df = DataFrame(np.random.randn(len(rng), 4), index=rng)
- store["df"] = df
- result = store["df"]
- tm.assert_frame_equal(result, df)
- # as data
- # GH11411
- _maybe_remove(store, "df")
- df = DataFrame(
- {
- "A": rng,
- "B": rng.tz_convert("UTC").tz_localize(None),
- "C": rng.tz_convert("CET"),
- "D": range(len(rng)),
- },
- index=rng,
- )
- store["df"] = df
- result = store["df"]
- tm.assert_frame_equal(result, df)
- def test_timezones_fixed_format_empty(setup_path, tz_aware_fixture, frame_or_series):
- # GH 20594
- dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)
- obj = Series(dtype=dtype, name="A")
- if frame_or_series is DataFrame:
- obj = obj.to_frame()
- with ensure_clean_store(setup_path) as store:
- store["obj"] = obj
- result = store["obj"]
- tm.assert_equal(result, obj)
- def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture):
- # GH 20594
- dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)
- with ensure_clean_store(setup_path) as store:
- s = Series([0], dtype=dtype)
- store["s"] = s
- result = store["s"]
- tm.assert_series_equal(result, s)
- def test_fixed_offset_tz(setup_path):
- rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00")
- frame = DataFrame(np.random.randn(len(rng), 4), index=rng)
- with ensure_clean_store(setup_path) as store:
- store["frame"] = frame
- recons = store["frame"]
- tm.assert_index_equal(recons.index, rng)
- assert rng.tz == recons.index.tz
- @td.skip_if_windows
- def test_store_timezone(setup_path):
- # GH2852
- # issue storing datetime.date with a timezone as it resets when read
- # back in a new timezone
- # original method
- with ensure_clean_store(setup_path) as store:
- today = date(2013, 9, 10)
- df = DataFrame([1, 2, 3], index=[today, today, today])
- store["obj1"] = df
- result = store["obj1"]
- tm.assert_frame_equal(result, df)
- # with tz setting
- with ensure_clean_store(setup_path) as store:
- with tm.set_timezone("EST5EDT"):
- today = date(2013, 9, 10)
- df = DataFrame([1, 2, 3], index=[today, today, today])
- store["obj1"] = df
- with tm.set_timezone("CST6CDT"):
- result = store["obj1"]
- tm.assert_frame_equal(result, df)
- def test_legacy_datetimetz_object(datapath):
- # legacy from < 0.17.0
- # 8260
- expected = DataFrame(
- {
- "A": Timestamp("20130102", tz="US/Eastern"),
- "B": Timestamp("20130603", tz="CET"),
- },
- index=range(5),
- )
- with ensure_clean_store(
- datapath("io", "data", "legacy_hdf", "datetimetz_object.h5"), mode="r"
- ) as store:
- result = store["df"]
- tm.assert_frame_equal(result, expected)
- def test_dst_transitions(setup_path):
- # make sure we are not failing on transitions
- with ensure_clean_store(setup_path) as store:
- times = date_range(
- "2013-10-26 23:00",
- "2013-10-27 01:00",
- tz="Europe/London",
- freq="H",
- ambiguous="infer",
- )
- times = times._with_freq(None) # freq doesn't round-trip
- for i in [times, times + pd.Timedelta("10min")]:
- _maybe_remove(store, "df")
- df = DataFrame({"A": range(len(i)), "B": i}, index=i)
- store.append("df", df)
- result = store.select("df")
- tm.assert_frame_equal(result, df)
- def test_read_with_where_tz_aware_index(tmp_path, setup_path):
- # GH 11926
- periods = 10
- dts = date_range("20151201", periods=periods, freq="D", tz="UTC")
- mi = pd.MultiIndex.from_arrays([dts, range(periods)], names=["DATE", "NO"])
- expected = DataFrame({"MYCOL": 0}, index=mi)
- key = "mykey"
- path = tmp_path / setup_path
- with pd.HDFStore(path) as store:
- store.append(key, expected, format="table", append=True)
- result = pd.read_hdf(path, key, where="DATE > 20151130")
- tm.assert_frame_equal(result, expected)
- def test_py2_created_with_datetimez(datapath):
- # The test HDF5 file was created in Python 2, but could not be read in
- # Python 3.
- #
- # GH26443
- index = [Timestamp("2019-01-01T18:00").tz_localize("America/New_York")]
- expected = DataFrame({"data": 123}, index=index)
- with ensure_clean_store(
- datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r"
- ) as store:
- result = store["key"]
- tm.assert_frame_equal(result, expected)
|