timezones.pyx 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448
  1. from datetime import (
  2. timedelta,
  3. timezone,
  4. )
  5. from pandas.compat._optional import import_optional_dependency
  6. try:
  7. # py39+
  8. import zoneinfo
  9. from zoneinfo import ZoneInfo
  10. except ImportError:
  11. zoneinfo = None
  12. ZoneInfo = None
  13. from cpython.datetime cimport (
  14. datetime,
  15. timedelta,
  16. tzinfo,
  17. )
  18. # dateutil compat
  19. from dateutil.tz import (
  20. gettz as dateutil_gettz,
  21. tzfile as _dateutil_tzfile,
  22. tzlocal as _dateutil_tzlocal,
  23. tzutc as _dateutil_tzutc,
  24. )
  25. import numpy as np
  26. import pytz
  27. from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo
  28. cimport numpy as cnp
  29. from numpy cimport int64_t
  30. cnp.import_array()
  31. # ----------------------------------------------------------------------
  32. from pandas._libs.tslibs.util cimport (
  33. get_nat,
  34. is_integer_object,
  35. )
  36. cdef int64_t NPY_NAT = get_nat()
  37. cdef tzinfo utc_stdlib = timezone.utc
  38. cdef tzinfo utc_pytz = pytz.utc
  39. cdef tzinfo utc_dateutil_str = dateutil_gettz("UTC") # NB: *not* the same as tzutc()
  40. cdef tzinfo utc_zoneinfo = None
  41. # ----------------------------------------------------------------------
  42. cdef bint is_utc_zoneinfo(tzinfo tz):
  43. # Workaround for cases with missing tzdata
  44. # https://github.com/pandas-dev/pandas/pull/46425#discussion_r830633025
  45. if tz is None or zoneinfo is None:
  46. return False
  47. global utc_zoneinfo
  48. if utc_zoneinfo is None:
  49. try:
  50. utc_zoneinfo = ZoneInfo("UTC")
  51. except zoneinfo.ZoneInfoNotFoundError:
  52. return False
  53. # Warn if tzdata is too old, even if there is a system tzdata to alert
  54. # users about the mismatch between local/system tzdata
  55. import_optional_dependency("tzdata", errors="warn", min_version="2022.1")
  56. return tz is utc_zoneinfo
  57. cpdef inline bint is_utc(tzinfo tz):
  58. return (
  59. tz is utc_pytz
  60. or tz is utc_stdlib
  61. or isinstance(tz, _dateutil_tzutc)
  62. or tz is utc_dateutil_str
  63. or is_utc_zoneinfo(tz)
  64. )
  65. cdef bint is_zoneinfo(tzinfo tz):
  66. if ZoneInfo is None:
  67. return False
  68. return isinstance(tz, ZoneInfo)
  69. cdef bint is_tzlocal(tzinfo tz):
  70. return isinstance(tz, _dateutil_tzlocal)
  71. cdef bint treat_tz_as_pytz(tzinfo tz):
  72. return (hasattr(tz, "_utc_transition_times") and
  73. hasattr(tz, "_transition_info"))
  74. cdef bint treat_tz_as_dateutil(tzinfo tz):
  75. return hasattr(tz, "_trans_list") and hasattr(tz, "_trans_idx")
  76. # Returns str or tzinfo object
  77. cpdef inline object get_timezone(tzinfo tz):
  78. """
  79. We need to do several things here:
  80. 1) Distinguish between pytz and dateutil timezones
  81. 2) Not be over-specific (e.g. US/Eastern with/without DST is same *zone*
  82. but a different tz object)
  83. 3) Provide something to serialize when we're storing a datetime object
  84. in pytables.
  85. We return a string prefaced with dateutil if it's a dateutil tz, else just
  86. the tz name. It needs to be a string so that we can serialize it with
  87. UJSON/pytables. maybe_get_tz (below) is the inverse of this process.
  88. """
  89. if tz is None:
  90. raise TypeError("tz argument cannot be None")
  91. if is_utc(tz):
  92. return tz
  93. else:
  94. if treat_tz_as_dateutil(tz):
  95. if ".tar.gz" in tz._filename:
  96. raise ValueError(
  97. "Bad tz filename. Dateutil on python 3 on windows has a "
  98. "bug which causes tzfile._filename to be the same for all "
  99. "timezone files. Please construct dateutil timezones "
  100. 'implicitly by passing a string like "dateutil/Europe'
  101. '/London" when you construct your pandas objects instead '
  102. "of passing a timezone object. See "
  103. "https://github.com/pandas-dev/pandas/pull/7362")
  104. return "dateutil/" + tz._filename
  105. else:
  106. # tz is a pytz timezone or unknown.
  107. try:
  108. zone = tz.zone
  109. if zone is None:
  110. return tz
  111. return zone
  112. except AttributeError:
  113. return tz
  114. cpdef inline tzinfo maybe_get_tz(object tz):
  115. """
  116. (Maybe) Construct a timezone object from a string. If tz is a string, use
  117. it to construct a timezone object. Otherwise, just return tz.
  118. """
  119. if isinstance(tz, str):
  120. if tz == "tzlocal()":
  121. tz = _dateutil_tzlocal()
  122. elif tz.startswith("dateutil/"):
  123. zone = tz[9:]
  124. tz = dateutil_gettz(zone)
  125. # On Python 3 on Windows, the filename is not always set correctly.
  126. if isinstance(tz, _dateutil_tzfile) and ".tar.gz" in tz._filename:
  127. tz._filename = zone
  128. elif tz[0] in {"-", "+"}:
  129. hours = int(tz[0:3])
  130. minutes = int(tz[0] + tz[4:6])
  131. tz = timezone(timedelta(hours=hours, minutes=minutes))
  132. elif tz[0:4] in {"UTC-", "UTC+"}:
  133. hours = int(tz[3:6])
  134. minutes = int(tz[3] + tz[7:9])
  135. tz = timezone(timedelta(hours=hours, minutes=minutes))
  136. elif tz == "UTC" or tz == "utc":
  137. tz = utc_stdlib
  138. else:
  139. tz = pytz.timezone(tz)
  140. elif is_integer_object(tz):
  141. tz = timezone(timedelta(seconds=tz))
  142. elif isinstance(tz, tzinfo):
  143. pass
  144. elif tz is None:
  145. pass
  146. else:
  147. raise TypeError(type(tz))
  148. return tz
  149. def _p_tz_cache_key(tz: tzinfo):
  150. """
  151. Python interface for cache function to facilitate testing.
  152. """
  153. return tz_cache_key(tz)
  154. # Timezone data caches, key is the pytz string or dateutil file name.
  155. dst_cache = {}
  156. cdef object tz_cache_key(tzinfo tz):
  157. """
  158. Return the key in the cache for the timezone info object or None
  159. if unknown.
  160. The key is currently the tz string for pytz timezones, the filename for
  161. dateutil timezones.
  162. Notes
  163. -----
  164. This cannot just be the hash of a timezone object. Unfortunately, the
  165. hashes of two dateutil tz objects which represent the same timezone are
  166. not equal (even though the tz objects will compare equal and represent
  167. the same tz file). Also, pytz objects are not always hashable so we use
  168. str(tz) instead.
  169. """
  170. if isinstance(tz, _pytz_BaseTzInfo):
  171. return tz.zone
  172. elif isinstance(tz, _dateutil_tzfile):
  173. if ".tar.gz" in tz._filename:
  174. raise ValueError("Bad tz filename. Dateutil on python 3 on "
  175. "windows has a bug which causes tzfile._filename "
  176. "to be the same for all timezone files. Please "
  177. "construct dateutil timezones implicitly by "
  178. 'passing a string like "dateutil/Europe/London" '
  179. "when you construct your pandas objects instead "
  180. "of passing a timezone object. See "
  181. "https://github.com/pandas-dev/pandas/pull/7362")
  182. return "dateutil" + tz._filename
  183. else:
  184. return None
  185. # ----------------------------------------------------------------------
  186. # UTC Offsets
  187. cdef timedelta get_utcoffset(tzinfo tz, datetime obj):
  188. try:
  189. return tz._utcoffset
  190. except AttributeError:
  191. return tz.utcoffset(obj)
  192. cpdef inline bint is_fixed_offset(tzinfo tz):
  193. if treat_tz_as_dateutil(tz):
  194. if len(tz._trans_idx) == 0 and len(tz._trans_list) == 0:
  195. return 1
  196. else:
  197. return 0
  198. elif treat_tz_as_pytz(tz):
  199. if (len(tz._transition_info) == 0
  200. and len(tz._utc_transition_times) == 0):
  201. return 1
  202. else:
  203. return 0
  204. elif is_zoneinfo(tz):
  205. return 0
  206. # This also implicitly accepts datetime.timezone objects which are
  207. # considered fixed
  208. return 1
  209. cdef object _get_utc_trans_times_from_dateutil_tz(tzinfo tz):
  210. """
  211. Transition times in dateutil timezones are stored in local non-dst
  212. time. This code converts them to UTC. It's the reverse of the code
  213. in dateutil.tz.tzfile.__init__.
  214. """
  215. new_trans = list(tz._trans_list)
  216. last_std_offset = 0
  217. for i, (trans, tti) in enumerate(zip(tz._trans_list, tz._trans_idx)):
  218. if not tti.isdst:
  219. last_std_offset = tti.offset
  220. new_trans[i] = trans - last_std_offset
  221. return new_trans
  222. cdef int64_t[::1] unbox_utcoffsets(object transinfo):
  223. cdef:
  224. Py_ssize_t i, sz
  225. int64_t[::1] arr
  226. sz = len(transinfo)
  227. arr = np.empty(sz, dtype="i8")
  228. for i in range(sz):
  229. arr[i] = int(transinfo[i][0].total_seconds()) * 1_000_000_000
  230. return arr
  231. # ----------------------------------------------------------------------
  232. # Daylight Savings
  233. cdef object get_dst_info(tzinfo tz):
  234. """
  235. Returns
  236. -------
  237. ndarray[int64_t]
  238. Nanosecond UTC times of DST transitions.
  239. ndarray[int64_t]
  240. Nanosecond UTC offsets corresponding to DST transitions.
  241. str
  242. Describing the type of tzinfo object.
  243. """
  244. cache_key = tz_cache_key(tz)
  245. if cache_key is None:
  246. # e.g. pytz.FixedOffset, matplotlib.dates._UTC,
  247. # psycopg2.tz.FixedOffsetTimezone
  248. num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000
  249. # If we have e.g. ZoneInfo here, the get_utcoffset call will return None,
  250. # so the total_seconds() call will raise AttributeError.
  251. return (np.array([NPY_NAT + 1], dtype=np.int64),
  252. np.array([num], dtype=np.int64),
  253. "unknown")
  254. if cache_key not in dst_cache:
  255. if treat_tz_as_pytz(tz):
  256. trans = np.array(tz._utc_transition_times, dtype="M8[ns]")
  257. trans = trans.view("i8")
  258. if tz._utc_transition_times[0].year == 1:
  259. trans[0] = NPY_NAT + 1
  260. deltas = unbox_utcoffsets(tz._transition_info)
  261. typ = "pytz"
  262. elif treat_tz_as_dateutil(tz):
  263. if len(tz._trans_list):
  264. # get utc trans times
  265. trans_list = _get_utc_trans_times_from_dateutil_tz(tz)
  266. trans = np.hstack([
  267. np.array([0], dtype="M8[s]"), # place holder for 1st item
  268. np.array(trans_list, dtype="M8[s]")]).astype(
  269. "M8[ns]") # all trans listed
  270. trans = trans.view("i8")
  271. trans[0] = NPY_NAT + 1
  272. # deltas
  273. deltas = np.array([v.offset for v in (
  274. tz._ttinfo_before,) + tz._trans_idx], dtype="i8")
  275. deltas *= 1_000_000_000
  276. typ = "dateutil"
  277. elif is_fixed_offset(tz):
  278. trans = np.array([NPY_NAT + 1], dtype=np.int64)
  279. deltas = np.array([tz._ttinfo_std.offset],
  280. dtype="i8") * 1_000_000_000
  281. typ = "fixed"
  282. else:
  283. # 2018-07-12 this is not reached in the tests, and this case
  284. # is not handled in any of the functions that call
  285. # get_dst_info. If this case _were_ hit the calling
  286. # functions would then hit an IndexError because they assume
  287. # `deltas` is non-empty.
  288. # (under the just-deleted code that returned empty arrays)
  289. raise AssertionError("dateutil tzinfo is not a FixedOffset "
  290. "and has an empty `_trans_list`.", tz)
  291. else:
  292. # static tzinfo, we can get here with pytz.StaticTZInfo
  293. # which are not caught by treat_tz_as_pytz
  294. trans = np.array([NPY_NAT + 1], dtype=np.int64)
  295. num = int(get_utcoffset(tz, None).total_seconds()) * 1_000_000_000
  296. deltas = np.array([num], dtype=np.int64)
  297. typ = "static"
  298. dst_cache[cache_key] = (trans, deltas, typ)
  299. return dst_cache[cache_key]
  300. def infer_tzinfo(datetime start, datetime end):
  301. if start is not None and end is not None:
  302. tz = start.tzinfo
  303. if not tz_compare(tz, end.tzinfo):
  304. raise AssertionError(f"Inputs must both have the same timezone, "
  305. f"{tz} != {end.tzinfo}")
  306. elif start is not None:
  307. tz = start.tzinfo
  308. elif end is not None:
  309. tz = end.tzinfo
  310. else:
  311. tz = None
  312. return tz
  313. cpdef bint tz_compare(tzinfo start, tzinfo end):
  314. """
  315. Compare string representations of timezones
  316. The same timezone can be represented as different instances of
  317. timezones. For example
  318. `<DstTzInfo 'Europe/Paris' LMT+0:09:00 STD>` and
  319. `<DstTzInfo 'Europe/Paris' CET+1:00:00 STD>` are essentially same
  320. timezones but aren't evaluated such, but the string representation
  321. for both of these is `'Europe/Paris'`.
  322. This exists only to add a notion of equality to pytz-style zones
  323. that is compatible with the notion of equality expected of tzinfo
  324. subclasses.
  325. Parameters
  326. ----------
  327. start : tzinfo
  328. end : tzinfo
  329. Returns:
  330. -------
  331. bool
  332. """
  333. # GH 18523
  334. if is_utc(start):
  335. # GH#38851 consider pytz/dateutil/stdlib UTCs as equivalent
  336. return is_utc(end)
  337. elif is_utc(end):
  338. # Ensure we don't treat tzlocal as equal to UTC when running in UTC
  339. return False
  340. elif start is None or end is None:
  341. return start is None and end is None
  342. return get_timezone(start) == get_timezone(end)
  343. def tz_standardize(tz: tzinfo) -> tzinfo:
  344. """
  345. If the passed tz is a pytz timezone object, "normalize" it to the a
  346. consistent version
  347. Parameters
  348. ----------
  349. tz : tzinfo
  350. Returns
  351. -------
  352. tzinfo
  353. Examples
  354. --------
  355. >>> from datetime import datetime
  356. >>> from pytz import timezone
  357. >>> tz = timezone('US/Pacific').normalize(
  358. ... datetime(2014, 1, 1, tzinfo=pytz.utc)
  359. ... ).tzinfo
  360. >>> tz
  361. <DstTzInfo 'US/Pacific' PST-1 day, 16:00:00 STD>
  362. >>> tz_standardize(tz)
  363. <DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
  364. >>> tz = timezone('US/Pacific')
  365. >>> tz
  366. <DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
  367. >>> tz_standardize(tz)
  368. <DstTzInfo 'US/Pacific' LMT-1 day, 16:07:00 STD>
  369. """
  370. if treat_tz_as_pytz(tz):
  371. return pytz.timezone(str(tz))
  372. return tz