123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448 |
- from datetime import (
- timedelta,
- timezone,
- )
- from pandas.compat._optional import import_optional_dependency
- try:
- # py39+
- import zoneinfo
- from zoneinfo import ZoneInfo
- except ImportError:
- zoneinfo = None
- ZoneInfo = None
- from cpython.datetime cimport (
- datetime,
- timedelta,
- tzinfo,
- )
- # dateutil compat
- from dateutil.tz import (
- gettz as dateutil_gettz,
- tzfile as _dateutil_tzfile,
- tzlocal as _dateutil_tzlocal,
- tzutc as _dateutil_tzutc,
- )
- import numpy as np
- import pytz
- from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo
- cimport numpy as cnp
- from numpy cimport int64_t
- cnp.import_array()
- # ----------------------------------------------------------------------
- from pandas._libs.tslibs.util cimport (
- get_nat,
- is_integer_object,
- )
- cdef int64_t NPY_NAT = get_nat()
- cdef tzinfo utc_stdlib = timezone.utc
- cdef tzinfo utc_pytz = pytz.utc
- cdef tzinfo utc_dateutil_str = dateutil_gettz("UTC") # NB: *not* the same as tzutc()
- cdef tzinfo utc_zoneinfo = None
- # ----------------------------------------------------------------------
- cdef bint is_utc_zoneinfo(tzinfo tz):
- # Workaround for cases with missing tzdata
- # https://github.com/pandas-dev/pandas/pull/46425#discussion_r830633025
- if tz is None or zoneinfo is None:
- return False
- global utc_zoneinfo
- if utc_zoneinfo is None:
- try:
- utc_zoneinfo = ZoneInfo("UTC")
- except zoneinfo.ZoneInfoNotFoundError:
- return False
- # Warn if tzdata is too old, even if there is a system tzdata to alert
- # users about the mismatch between local/system tzdata
- import_optional_dependency("tzdata", errors="warn", min_version="2022.1")
- return tz is utc_zoneinfo
- cpdef inline bint is_utc(tzinfo tz):
- return (
- tz is utc_pytz
- or tz is utc_stdlib
- or isinstance(tz, _dateutil_tzutc)
- or tz is utc_dateutil_str
- or is_utc_zoneinfo(tz)
- )
- cdef bint is_zoneinfo(tzinfo tz):
- if ZoneInfo is None:
- return False
- return isinstance(tz, ZoneInfo)
- cdef bint is_tzlocal(tzinfo tz):
- return isinstance(tz, _dateutil_tzlocal)
- cdef bint treat_tz_as_pytz(tzinfo tz):
- return (hasattr(tz, "_utc_transition_times") and
- hasattr(tz, "_transition_info"))
- cdef bint treat_tz_as_dateutil(tzinfo tz):
- return hasattr(tz, "_trans_list") and hasattr(tz, "_trans_idx")
- # Returns str or tzinfo object
- cpdef inline object get_timezone(tzinfo tz):
- """
- We need to do several things here:
- 1) Distinguish between pytz and dateutil timezones
- 2) Not be over-specific (e.g. US/Eastern with/without DST is same *zone*
- but a different tz object)
- 3) Provide something to serialize when we're storing a datetime object
- in pytables.
- We return a string prefaced with dateutil if it's a dateutil tz, else just
- the tz name. It needs to be a string so that we can serialize it with
- UJSON/pytables. maybe_get_tz (below) is the inverse of this process.
- """
- if tz is None:
- raise TypeError("tz argument cannot be None")
- if is_utc(tz):
- return tz
- else:
- if treat_tz_as_dateutil(tz):
- if ".tar.gz" in tz._filename:
- raise ValueError(
- "Bad tz filename. Dateutil on python 3 on windows has a "
- "bug which causes tzfile._filename to be the same for all "
- "timezone files. Please construct dateutil timezones "
- 'implicitly by passing a string like "dateutil/Europe'
- '/London" when you construct your pandas objects instead '
- "of passing a timezone object. See "
- "https://github.com/pandas-dev/pandas/pull/7362")
- return "dateutil/" + tz._filename
- else:
- # tz is a pytz timezone or unknown.
- try:
- zone = tz.zone
- if zone is None:
- return tz
- return zone
- except AttributeError:
- return tz
- cpdef inline tzinfo maybe_get_tz(object tz):
- """
- (Maybe) Construct a timezone object from a string. If tz is a string, use
- it to construct a timezone object. Otherwise, just return tz.
- """
- if isinstance(tz, str):
- if tz == "tzlocal()":
- tz = _dateutil_tzlocal()
- elif tz.startswith("dateutil/"):
- zone = tz[9:]
- tz = dateutil_gettz(zone)
- # On Python 3 on Windows, the filename is not always set correctly.
- if isinstance(tz, _dateutil_tzfile) and ".tar.gz" in tz._filename:
- tz._filename = zone
- elif tz[0] in {"-", "+"}:
- hours = int(tz[0:3])
- minutes = int(tz[0] + tz[4:6])
- tz = timezone(timedelta(hours=hours, minutes=minutes))
- elif tz[0:4] in {"UTC-", "UTC+"}:
- hours = int(tz[3:6])
- minutes = int(tz[3] + tz[7:9])
- tz = timezone(timedelta(hours=hours, minutes=minutes))
- elif tz == "UTC" or tz == "utc":
- tz = utc_stdlib
- else:
- tz = pytz.timezone(tz)
- elif is_integer_object(tz):
- tz = timezone(timedelta(seconds=tz))
- elif isinstance(tz, tzinfo):
- pass
- elif tz is None:
- pass
- else:
- raise TypeError(type(tz))
- return tz
- def _p_tz_cache_key(tz: tzinfo):
- """
- Python interface for cache function to facilitate testing.
- """
- return tz_cache_key(tz)
- # Timezone data caches, key is the pytz string or dateutil file name.
- dst_cache = {}
- cdef object tz_cache_key(tzinfo tz):
- """
- Return the key in the cache for the timezone info object or None
- if unknown.
- The key is currently the tz string for pytz timezones, the filename for
- dateutil timezones.
- Notes
- -----
- This cannot just be the hash of a timezone object. Unfortunately, the
- hashes of two dateutil tz objects which represent the same timezone are
- not equal (even though the tz objects will compare equal and represent
- the same tz file). Also, pytz objects are not always hashable so we use
- str(tz) instead.
- """
- if isinstance(tz, _pytz_BaseTzInfo):
- return tz.zone
- elif isinstance(tz, _dateutil_tzfile):
- if ".tar.gz" in tz._filename:
- raise ValueError("Bad tz filename. Dateutil on python 3 on "
- "windows has a bug which causes tzfile._filename "
- "to be the same for all timezone files. Please "
- "construct dateutil timezones implicitly by "
- 'passing a string like "dateutil/Europe/London" '
- "when you construct your pandas objects instead "
- "of passing a timezone object. See "
- "https://github.com/pandas-dev/pandas/pull/7362")
- return "dateutil" + tz._filename
- else:
- return None
- # ----------------------------------------------------------------------
- # UTC Offsets
- cdef timedelta get_utcoffset(tzinfo tz, datetime obj):
- try:
- return tz._utcoffset
- except AttributeError:
- return tz.utcoffset(obj)
- cpdef inline bint is_fixed_offset(tzinfo tz):
- if treat_tz_as_dateutil(tz):
- if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0:
- return 1
- else:
- return 0
- elif treat_tz_as_pytz(tz):
- if (len(tz._transition_info) == 0
- and len(tz._utc_transition_times) == 0):
- return 1
- else:
- return 0
- elif is_zoneinfo(tz):
- return 0
- # This also implicitly accepts datetime.timezone objects which are
- # considered fixed
- return 1
- cdef object _get_utc_trans_times_from_dateutil_tz(tzinfo tz):
- """
- Transition times in dateutil timezones are stored in local non-dst
- time. This code converts them to UTC. It's the reverse of the code
- in dateutil.tz.tzfile.__init__.
- """
- new_trans = list(tz._trans_list)
- last_std_offset = 0
- for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx)):
- if not tti.isdst:
- last_std_offset = tti.offset
- new_trans[i] = trans - last_std_offset
- return new_trans
- cdef int64_t[::1] unbox_utcoffsets(object transinfo):
- cdef:
- Py_ssize_t i, sz
- int64_t[::1] arr
- sz = len(transinfo)
- arr = np.empty(sz, dtype="i8")
- for i in range(sz):
- arr[i] = int(transinfo[i][0].total_seconds()) * 1_000_000_000
- return arr
- # ----------------------------------------------------------------------
- # Daylight Savings
- cdef object get_dst_info(tzinfo tz):
- """
- Returns
- -------
- ndarray[int64_t]
- Nanosecond UTC times of DST transitions.
- ndarray[int64_t]
- Nanosecond UTC offsets corresponding to DST transitions.
- str
- Describing the type of tzinfo object.
- """
- cache_key = tz_cache_key(tz)
- if cache_key is None:
- # e.g. pytz.FixedOffset, matplotlib.dates._UTC,
- # psycopg2.tz.FixedOffsetTimezone
- num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000
- # If we have e.g. ZoneInfo here, the get_utcoffset call will return None,
- # so the total_seconds() call will raise AttributeError.
- return (np.array([NPY_NAT + 1], dtype=np.int64),
- np.array([num], dtype=np.int64),
- "unknown")
- if cache_key not in dst_cache:
- if treat_tz_as_pytz(tz):
- trans = np.array(tz._utc_transition_times, dtype="M8[ns]")
- trans = trans.view("i8")
- if tz._utc_transition_times[0].year == 1:
- trans[0] = NPY_NAT + 1
- deltas = unbox_utcoffsets(tz._transition_info)
- typ = "pytz"
- elif treat_tz_as_dateutil(tz):
- if len(tz._trans_list):
- # get utc trans times
- trans_list = _get_utc_trans_times_from_dateutil_tz(tz)
- trans = np.hstack([
- np.array([0], dtype="M8[s]"), # place holder for 1st item
- np.array(trans_list, dtype="M8[s]")]).astype(
- "M8[ns]") # all trans listed
- trans = trans.view("i8")
- trans[0] = NPY_NAT + 1
- # deltas
- deltas = np.array([v.offset for v in (
- tz._ttinfo_before,) + tz._trans_idx], dtype="i8")
- deltas *= 1_000_000_000
- typ = "dateutil"
- elif is_fixed_offset(tz):
- trans = np.array([NPY_NAT + 1], dtype=np.int64)
- deltas = np.array([tz._ttinfo_std.offset],
- dtype="i8") * 1_000_000_000
- typ = "fixed"
- else:
- # 2018-07-12 this is not reached in the tests, and this case
- # is not handled in any of the functions that call
- # get_dst_info. If this case _were_ hit the calling
- # functions would then hit an IndexError because they assume
- # `deltas` is non-empty.
- # (under the just-deleted code that returned empty arrays)
- raise AssertionError("dateutil tzinfo is not a FixedOffset "
- "and has an empty `_trans_list`.", tz)
- else:
- # static tzinfo, we can get here with pytz.StaticTZInfo
- # which are not caught by treat_tz_as_pytz
- trans = np.array([NPY_NAT + 1], dtype=np.int64)
- num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000
- deltas = np.array([num], dtype=np.int64)
- typ = "static"
- dst_cache[cache_key] = (trans, deltas, typ)
- return dst_cache[cache_key]
- def infer_tzinfo(datetime start, datetime end):
- if start is not None and end is not None:
- tz = start.tzinfo
- if not tz_compare(tz, end.tzinfo):
- raise AssertionError(f"Inputs must both have the same timezone, "
- f"{tz} != {end.tzinfo}")
- elif start is not None:
- tz = start.tzinfo
- elif end is not None:
- tz = end.tzinfo
- else:
- tz = None
- return tz
- cpdef bint tz_compare(tzinfo start, tzinfo end):
- """
- Compare string representations of timezones
- The same timezone can be represented as different instances of
- timezones. For example
- `<DstTzInfo 'Europe/Paris' LMT+0:09:00 STD>` and
- `<DstTzInfo 'Europe/Paris' CET+1:00:00 STD>` are essentially same
- timezones but aren't evaluated such, but the string representation
- for both of these is `'Europe/Paris'`.
- This exists only to add a notion of equality to pytz-style zones
- that is compatible with the notion of equality expected of tzinfo
- subclasses.
- Parameters
- ----------
- start : tzinfo
- end : tzinfo
- Returns:
- -------
- bool
- """
- # GH 18523
- if is_utc(start):
- # GH#38851 consider pytz/dateutil/stdlib UTCs as equivalent
- return is_utc(end)
- elif is_utc(end):
- # Ensure we don't treat tzlocal as equal to UTC when running in UTC
- return False
- elif start is None or end is None:
- return start is None and end is None
- return get_timezone(start) == get_timezone(end)
- def tz_standardize(tz: tzinfo) -> tzinfo:
- """
- If the passed tz is a pytz timezone object, "normalize" it to the a
- consistent version
- Parameters
- ----------
- tz : tzinfo
- Returns
- -------
- tzinfo
- Examples
- --------
- >>> from datetime import datetime
- >>> from pytz import timezone
- >>> tz = timezone('US/Pacific').normalize(
- ... datetime(2014, 1, 1, tzinfo=pytz.utc)
- ... ).tzinfo
- >>> tz
- <DstTzInfo 'US/Pacific' PST-1 day, 16:00:00 STD>
- >>> tz_standardize(tz)
- <DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
- >>> tz = timezone('US/Pacific')
- >>> tz
- <DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
- >>> tz_standardize(tz)
- <DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
- """
- if treat_tz_as_pytz(tz):
- return pytz.timezone(str(tz))
- return tz
|