tzconversion.pyx 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816
  1. """
  2. timezone conversion
  3. """
  4. cimport cython
  5. from cpython.datetime cimport (
  6. PyDelta_Check,
  7. datetime,
  8. datetime_new,
  9. import_datetime,
  10. timedelta,
  11. tzinfo,
  12. )
  13. from cython cimport Py_ssize_t
  14. import_datetime()
  15. import numpy as np
  16. import pytz
  17. cimport numpy as cnp
  18. from numpy cimport (
  19. int64_t,
  20. intp_t,
  21. ndarray,
  22. uint8_t,
  23. )
  24. cnp.import_array()
  25. from pandas._libs.tslibs.dtypes cimport (
  26. periods_per_day,
  27. periods_per_second,
  28. )
  29. from pandas._libs.tslibs.nattype cimport NPY_NAT
  30. from pandas._libs.tslibs.np_datetime cimport (
  31. NPY_DATETIMEUNIT,
  32. npy_datetimestruct,
  33. pandas_datetime_to_datetimestruct,
  34. pydatetime_to_dt64,
  35. )
  36. from pandas._libs.tslibs.timezones cimport (
  37. get_dst_info,
  38. is_fixed_offset,
  39. is_tzlocal,
  40. is_utc,
  41. is_zoneinfo,
  42. utc_stdlib,
  43. )
  44. cdef const int64_t[::1] _deltas_placeholder = np.array([], dtype=np.int64)
  45. @cython.freelist(16)
  46. @cython.final
  47. cdef class Localizer:
  48. # cdef:
  49. # tzinfo tz
  50. # NPY_DATETIMEUNIT _creso
  51. # bint use_utc, use_fixed, use_tzlocal, use_dst, use_pytz
  52. # ndarray trans
  53. # Py_ssize_t ntrans
  54. # const int64_t[::1] deltas
  55. # int64_t delta
  56. # int64_t* tdata
  57. @cython.initializedcheck(False)
  58. @cython.boundscheck(False)
  59. def __cinit__(self, tzinfo tz, NPY_DATETIMEUNIT creso):
  60. self.tz = tz
  61. self._creso = creso
  62. self.use_utc = self.use_tzlocal = self.use_fixed = False
  63. self.use_dst = self.use_pytz = False
  64. self.ntrans = -1 # placeholder
  65. self.delta = -1 # placeholder
  66. self.deltas = _deltas_placeholder
  67. self.tdata = NULL
  68. if is_utc(tz) or tz is None:
  69. self.use_utc = True
  70. elif is_tzlocal(tz) or is_zoneinfo(tz):
  71. self.use_tzlocal = True
  72. else:
  73. trans, deltas, typ = get_dst_info(tz)
  74. if creso != NPY_DATETIMEUNIT.NPY_FR_ns:
  75. # NB: using floordiv here is implicitly assuming we will
  76. # never see trans or deltas that are not an integer number
  77. # of seconds.
  78. # TODO: avoid these np.array calls
  79. if creso == NPY_DATETIMEUNIT.NPY_FR_us:
  80. trans = np.array(trans) // 1_000
  81. deltas = np.array(deltas) // 1_000
  82. elif creso == NPY_DATETIMEUNIT.NPY_FR_ms:
  83. trans = np.array(trans) // 1_000_000
  84. deltas = np.array(deltas) // 1_000_000
  85. elif creso == NPY_DATETIMEUNIT.NPY_FR_s:
  86. trans = np.array(trans) // 1_000_000_000
  87. deltas = np.array(deltas) // 1_000_000_000
  88. else:
  89. raise NotImplementedError(creso)
  90. self.trans = trans
  91. self.ntrans = self.trans.shape[0]
  92. self.deltas = deltas
  93. if typ != "pytz" and typ != "dateutil":
  94. # static/fixed; in this case we know that len(delta) == 1
  95. self.use_fixed = True
  96. self.delta = deltas[0]
  97. else:
  98. self.use_dst = True
  99. if typ == "pytz":
  100. self.use_pytz = True
  101. self.tdata = <int64_t*>cnp.PyArray_DATA(trans)
  102. @cython.boundscheck(False)
  103. cdef int64_t utc_val_to_local_val(
  104. self, int64_t utc_val, Py_ssize_t* pos, bint* fold=NULL
  105. ) except? -1:
  106. if self.use_utc:
  107. return utc_val
  108. elif self.use_tzlocal:
  109. return utc_val + _tz_localize_using_tzinfo_api(
  110. utc_val, self.tz, to_utc=False, creso=self._creso, fold=fold
  111. )
  112. elif self.use_fixed:
  113. return utc_val + self.delta
  114. else:
  115. pos[0] = bisect_right_i8(self.tdata, utc_val, self.ntrans) - 1
  116. if fold is not NULL:
  117. fold[0] = _infer_dateutil_fold(
  118. utc_val, self.trans, self.deltas, pos[0]
  119. )
  120. return utc_val + self.deltas[pos[0]]
  121. cdef int64_t tz_localize_to_utc_single(
  122. int64_t val,
  123. tzinfo tz,
  124. object ambiguous=None,
  125. object nonexistent=None,
  126. NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_ns,
  127. ) except? -1:
  128. """See tz_localize_to_utc.__doc__"""
  129. cdef:
  130. int64_t delta
  131. int64_t[::1] deltas
  132. if val == NPY_NAT:
  133. return val
  134. elif is_utc(tz) or tz is None:
  135. # TODO: test with non-nano
  136. return val
  137. elif is_tzlocal(tz):
  138. return val - _tz_localize_using_tzinfo_api(val, tz, to_utc=True, creso=creso)
  139. elif is_fixed_offset(tz):
  140. _, deltas, _ = get_dst_info(tz)
  141. delta = deltas[0]
  142. # TODO: de-duplicate with Localizer.__init__
  143. if creso != NPY_DATETIMEUNIT.NPY_FR_ns:
  144. if creso == NPY_DATETIMEUNIT.NPY_FR_us:
  145. delta = delta // 1000
  146. elif creso == NPY_DATETIMEUNIT.NPY_FR_ms:
  147. delta = delta // 1_000_000
  148. elif creso == NPY_DATETIMEUNIT.NPY_FR_s:
  149. delta = delta // 1_000_000_000
  150. return val - delta
  151. else:
  152. return tz_localize_to_utc(
  153. np.array([val], dtype="i8"),
  154. tz,
  155. ambiguous=ambiguous,
  156. nonexistent=nonexistent,
  157. creso=creso,
  158. )[0]
  159. @cython.boundscheck(False)
  160. @cython.wraparound(False)
  161. def tz_localize_to_utc(
  162. ndarray[int64_t] vals,
  163. tzinfo tz,
  164. object ambiguous=None,
  165. object nonexistent=None,
  166. NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_ns,
  167. ):
  168. """
  169. Localize tzinfo-naive i8 to given time zone (using pytz). If
  170. there are ambiguities in the values, raise AmbiguousTimeError.
  171. Parameters
  172. ----------
  173. vals : ndarray[int64_t]
  174. tz : tzinfo or None
  175. ambiguous : str, bool, or arraylike
  176. When clocks moved backward due to DST, ambiguous times may arise.
  177. For example in Central European Time (UTC+01), when going from 03:00
  178. DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC
  179. and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter
  180. dictates how ambiguous times should be handled.
  181. - 'infer' will attempt to infer fall dst-transition hours based on
  182. order
  183. - bool-ndarray where True signifies a DST time, False signifies a
  184. non-DST time (note that this flag is only applicable for ambiguous
  185. times, but the array must have the same length as vals)
  186. - bool if True, treat all vals as DST. If False, treat them as non-DST
  187. - 'NaT' will return NaT where there are ambiguous times
  188. nonexistent : {None, "NaT", "shift_forward", "shift_backward", "raise", \
  189. timedelta-like}
  190. How to handle non-existent times when converting wall times to UTC
  191. creso : NPY_DATETIMEUNIT, default NPY_FR_ns
  192. Returns
  193. -------
  194. localized : ndarray[int64_t]
  195. """
  196. cdef:
  197. ndarray[uint8_t, cast=True] ambiguous_array
  198. Py_ssize_t i, n = vals.shape[0]
  199. Py_ssize_t delta_idx_offset, delta_idx
  200. int64_t v, left, right, val, new_local, remaining_mins
  201. int64_t first_delta, delta
  202. int64_t shift_delta = 0
  203. ndarray[int64_t] result_a, result_b, dst_hours
  204. int64_t[::1] result
  205. bint is_zi = False
  206. bint infer_dst = False, is_dst = False, fill = False
  207. bint shift_forward = False, shift_backward = False
  208. bint fill_nonexist = False
  209. str stamp
  210. Localizer info = Localizer(tz, creso=creso)
  211. int64_t pph = periods_per_day(creso) // 24
  212. int64_t pps = periods_per_second(creso)
  213. npy_datetimestruct dts
  214. # Vectorized version of DstTzInfo.localize
  215. if info.use_utc:
  216. return vals.copy()
  217. # silence false-positive compiler warning
  218. ambiguous_array = np.empty(0, dtype=bool)
  219. if isinstance(ambiguous, str):
  220. if ambiguous == "infer":
  221. infer_dst = True
  222. elif ambiguous == "NaT":
  223. fill = True
  224. elif isinstance(ambiguous, bool):
  225. is_dst = True
  226. if ambiguous:
  227. ambiguous_array = np.ones(len(vals), dtype=bool)
  228. else:
  229. ambiguous_array = np.zeros(len(vals), dtype=bool)
  230. elif hasattr(ambiguous, "__iter__"):
  231. is_dst = True
  232. if len(ambiguous) != len(vals):
  233. raise ValueError("Length of ambiguous bool-array must be "
  234. "the same size as vals")
  235. ambiguous_array = np.asarray(ambiguous, dtype=bool)
  236. if nonexistent == "NaT":
  237. fill_nonexist = True
  238. elif nonexistent == "shift_forward":
  239. shift_forward = True
  240. elif nonexistent == "shift_backward":
  241. shift_backward = True
  242. elif PyDelta_Check(nonexistent):
  243. from .timedeltas import delta_to_nanoseconds
  244. shift_delta = delta_to_nanoseconds(nonexistent, reso=creso)
  245. elif nonexistent not in ("raise", None):
  246. msg = ("nonexistent must be one of {'NaT', 'raise', 'shift_forward', "
  247. "shift_backwards} or a timedelta object")
  248. raise ValueError(msg)
  249. result = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)
  250. if info.use_tzlocal and not is_zoneinfo(tz):
  251. for i in range(n):
  252. v = vals[i]
  253. if v == NPY_NAT:
  254. result[i] = NPY_NAT
  255. else:
  256. result[i] = v - _tz_localize_using_tzinfo_api(
  257. v, tz, to_utc=True, creso=creso
  258. )
  259. return result.base # to return underlying ndarray
  260. elif info.use_fixed:
  261. delta = info.delta
  262. for i in range(n):
  263. v = vals[i]
  264. if v == NPY_NAT:
  265. result[i] = NPY_NAT
  266. else:
  267. result[i] = v - delta
  268. return result.base # to return underlying ndarray
  269. # Determine whether each date lies left of the DST transition (store in
  270. # result_a) or right of the DST transition (store in result_b)
  271. if is_zoneinfo(tz):
  272. is_zi = True
  273. result_a, result_b =_get_utc_bounds_zoneinfo(
  274. vals, tz, creso=creso
  275. )
  276. else:
  277. result_a, result_b =_get_utc_bounds(
  278. vals, info.tdata, info.ntrans, info.deltas, creso=creso
  279. )
  280. # silence false-positive compiler warning
  281. dst_hours = np.empty(0, dtype=np.int64)
  282. if infer_dst:
  283. dst_hours = _get_dst_hours(vals, result_a, result_b, creso=creso)
  284. # Pre-compute delta_idx_offset that will be used if we go down non-existent
  285. # paths.
  286. # Shift the delta_idx by if the UTC offset of
  287. # the target tz is greater than 0 and we're moving forward
  288. # or vice versa
  289. first_delta = info.deltas[0]
  290. if (shift_forward or shift_delta > 0) and first_delta > 0:
  291. delta_idx_offset = 1
  292. elif (shift_backward or shift_delta < 0) and first_delta < 0:
  293. delta_idx_offset = 1
  294. else:
  295. delta_idx_offset = 0
  296. for i in range(n):
  297. val = vals[i]
  298. left = result_a[i]
  299. right = result_b[i]
  300. if val == NPY_NAT:
  301. # TODO: test with non-nano
  302. result[i] = val
  303. elif left != NPY_NAT and right != NPY_NAT:
  304. if left == right:
  305. # TODO: test with non-nano
  306. result[i] = left
  307. else:
  308. if infer_dst and dst_hours[i] != NPY_NAT:
  309. # TODO: test with non-nano
  310. result[i] = dst_hours[i]
  311. elif is_dst:
  312. if ambiguous_array[i]:
  313. result[i] = left
  314. else:
  315. result[i] = right
  316. elif fill:
  317. # TODO: test with non-nano; parametrize test_dt_round_tz_ambiguous
  318. result[i] = NPY_NAT
  319. else:
  320. stamp = _render_tstamp(val, creso=creso)
  321. raise pytz.AmbiguousTimeError(
  322. f"Cannot infer dst time from {stamp}, try using the "
  323. "'ambiguous' argument"
  324. )
  325. elif left != NPY_NAT:
  326. result[i] = left
  327. elif right != NPY_NAT:
  328. # TODO: test with non-nano
  329. result[i] = right
  330. else:
  331. # Handle nonexistent times
  332. if shift_forward or shift_backward or shift_delta != 0:
  333. # Shift the nonexistent time to the closest existing time
  334. remaining_mins = val % pph
  335. if shift_delta != 0:
  336. # Validate that we don't relocalize on another nonexistent
  337. # time
  338. if -1 < shift_delta + remaining_mins < pph:
  339. raise ValueError(
  340. "The provided timedelta will relocalize on a "
  341. f"nonexistent time: {nonexistent}"
  342. )
  343. new_local = val + shift_delta
  344. elif shift_forward:
  345. new_local = val + (pph - remaining_mins)
  346. else:
  347. # Subtract 1 since the beginning hour is _inclusive_ of
  348. # nonexistent times
  349. new_local = val - remaining_mins - 1
  350. if is_zi:
  351. # use the same construction as in _get_utc_bounds_zoneinfo
  352. pandas_datetime_to_datetimestruct(new_local, creso, &dts)
  353. extra = (dts.ps // 1000) * (pps // 1_000_000_000)
  354. dt = datetime_new(dts.year, dts.month, dts.day, dts.hour,
  355. dts.min, dts.sec, dts.us, None)
  356. if shift_forward or shift_delta > 0:
  357. dt = dt.replace(tzinfo=tz, fold=1)
  358. else:
  359. dt = dt.replace(tzinfo=tz, fold=0)
  360. dt = dt.astimezone(utc_stdlib)
  361. dt = dt.replace(tzinfo=None)
  362. result[i] = pydatetime_to_dt64(dt, &dts, creso) + extra
  363. else:
  364. delta_idx = bisect_right_i8(info.tdata, new_local, info.ntrans)
  365. delta_idx = delta_idx - delta_idx_offset
  366. result[i] = new_local - info.deltas[delta_idx]
  367. elif fill_nonexist:
  368. result[i] = NPY_NAT
  369. else:
  370. stamp = _render_tstamp(val, creso=creso)
  371. raise pytz.NonExistentTimeError(stamp)
  372. return result.base # .base to get underlying ndarray
  373. cdef Py_ssize_t bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n):
  374. # Caller is responsible for checking n > 0
  375. # This looks very similar to local_search_right in the ndarray.searchsorted
  376. # implementation.
  377. cdef:
  378. Py_ssize_t pivot, left = 0, right = n
  379. # edge cases
  380. if val > data[n - 1]:
  381. return n
  382. # Caller is responsible for ensuring 'val >= data[0]'. This is
  383. # ensured by the fact that 'data' comes from get_dst_info where data[0]
  384. # is *always* NPY_NAT+1. If that ever changes, we will need to restore
  385. # the following disabled check.
  386. # if val < data[0]:
  387. # return 0
  388. while left < right:
  389. pivot = left + (right - left) // 2
  390. if data[pivot] <= val:
  391. left = pivot + 1
  392. else:
  393. right = pivot
  394. return left
  395. cdef str _render_tstamp(int64_t val, NPY_DATETIMEUNIT creso):
  396. """ Helper function to render exception messages"""
  397. from pandas._libs.tslibs.timestamps import Timestamp
  398. ts = Timestamp._from_value_and_reso(val, creso, None)
  399. return str(ts)
  400. cdef _get_utc_bounds(
  401. ndarray vals,
  402. int64_t* tdata,
  403. Py_ssize_t ntrans,
  404. const int64_t[::1] deltas,
  405. NPY_DATETIMEUNIT creso,
  406. ):
  407. # Determine whether each date lies left of the DST transition (store in
  408. # result_a) or right of the DST transition (store in result_b)
  409. cdef:
  410. ndarray result_a, result_b
  411. Py_ssize_t i, n = vals.size
  412. int64_t val, v_left, v_right
  413. Py_ssize_t isl, isr, pos_left, pos_right
  414. int64_t ppd = periods_per_day(creso)
  415. result_a = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)
  416. result_b = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)
  417. for i in range(n):
  418. # This loops resembles the "Find the two best possibilities" block
  419. # in pytz's DstTZInfo.localize method.
  420. result_a[i] = NPY_NAT
  421. result_b[i] = NPY_NAT
  422. val = vals[i]
  423. if val == NPY_NAT:
  424. continue
  425. # TODO: be careful of overflow in val-ppd
  426. isl = bisect_right_i8(tdata, val - ppd, ntrans) - 1
  427. if isl < 0:
  428. isl = 0
  429. v_left = val - deltas[isl]
  430. pos_left = bisect_right_i8(tdata, v_left, ntrans) - 1
  431. # timestamp falls to the left side of the DST transition
  432. if v_left + deltas[pos_left] == val:
  433. result_a[i] = v_left
  434. # TODO: be careful of overflow in val+ppd
  435. isr = bisect_right_i8(tdata, val + ppd, ntrans) - 1
  436. if isr < 0:
  437. isr = 0
  438. v_right = val - deltas[isr]
  439. pos_right = bisect_right_i8(tdata, v_right, ntrans) - 1
  440. # timestamp falls to the right side of the DST transition
  441. if v_right + deltas[pos_right] == val:
  442. result_b[i] = v_right
  443. return result_a, result_b
  444. cdef _get_utc_bounds_zoneinfo(ndarray vals, tz, NPY_DATETIMEUNIT creso):
  445. """
  446. For each point in 'vals', find the UTC time that it corresponds to if
  447. with fold=0 and fold=1. In non-ambiguous cases, these will match.
  448. Parameters
  449. ----------
  450. vals : ndarray[int64_t]
  451. tz : ZoneInfo
  452. creso : NPY_DATETIMEUNIT
  453. Returns
  454. -------
  455. ndarray[int64_t]
  456. ndarray[int64_t]
  457. """
  458. cdef:
  459. Py_ssize_t i, n = vals.size
  460. npy_datetimestruct dts
  461. datetime dt, rt, left, right, aware, as_utc
  462. int64_t val, pps = periods_per_second(creso)
  463. ndarray result_a, result_b
  464. result_a = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)
  465. result_b = cnp.PyArray_EMPTY(vals.ndim, vals.shape, cnp.NPY_INT64, 0)
  466. for i in range(n):
  467. val = vals[i]
  468. if val == NPY_NAT:
  469. result_a[i] = NPY_NAT
  470. result_b[i] = NPY_NAT
  471. continue
  472. pandas_datetime_to_datetimestruct(val, creso, &dts)
  473. # casting to pydatetime drops nanoseconds etc, which we will
  474. # need to re-add later as 'extra'
  475. extra = (dts.ps // 1000) * (pps // 1_000_000_000)
  476. dt = datetime_new(dts.year, dts.month, dts.day, dts.hour,
  477. dts.min, dts.sec, dts.us, None)
  478. aware = dt.replace(tzinfo=tz)
  479. as_utc = aware.astimezone(utc_stdlib)
  480. rt = as_utc.astimezone(tz)
  481. if aware != rt:
  482. # AFAICT this means that 'aware' is non-existent
  483. # TODO: better way to check this?
  484. # mail.python.org/archives/list/datetime-sig@python.org/
  485. # thread/57Y3IQAASJOKHX4D27W463XTZIS2NR3M/
  486. result_a[i] = NPY_NAT
  487. else:
  488. left = as_utc.replace(tzinfo=None)
  489. result_a[i] = pydatetime_to_dt64(left, &dts, creso) + extra
  490. aware = dt.replace(fold=1, tzinfo=tz)
  491. as_utc = aware.astimezone(utc_stdlib)
  492. rt = as_utc.astimezone(tz)
  493. if aware != rt:
  494. result_b[i] = NPY_NAT
  495. else:
  496. right = as_utc.replace(tzinfo=None)
  497. result_b[i] = pydatetime_to_dt64(right, &dts, creso) + extra
  498. return result_a, result_b
  499. @cython.boundscheck(False)
  500. cdef ndarray[int64_t] _get_dst_hours(
  501. # vals, creso only needed here to potential render an exception message
  502. const int64_t[:] vals,
  503. ndarray[int64_t] result_a,
  504. ndarray[int64_t] result_b,
  505. NPY_DATETIMEUNIT creso,
  506. ):
  507. cdef:
  508. Py_ssize_t i, n = vals.shape[0]
  509. ndarray[uint8_t, cast=True] mismatch
  510. ndarray[int64_t] delta, dst_hours
  511. ndarray[intp_t] switch_idxs, trans_idx, grp, a_idx, b_idx, one_diff
  512. list trans_grp
  513. intp_t switch_idx
  514. int64_t left, right
  515. dst_hours = cnp.PyArray_EMPTY(result_a.ndim, result_a.shape, cnp.NPY_INT64, 0)
  516. dst_hours[:] = NPY_NAT
  517. mismatch = cnp.PyArray_ZEROS(result_a.ndim, result_a.shape, cnp.NPY_BOOL, 0)
  518. for i in range(n):
  519. left = result_a[i]
  520. right = result_b[i]
  521. # Get the ambiguous hours (given the above, these are the hours
  522. # where result_a != result_b and neither of them are NAT)
  523. if left != right and left != NPY_NAT and right != NPY_NAT:
  524. mismatch[i] = 1
  525. trans_idx = mismatch.nonzero()[0]
  526. if trans_idx.size == 1:
  527. # see test_tz_localize_to_utc_ambiguous_infer
  528. stamp = _render_tstamp(vals[trans_idx[0]], creso=creso)
  529. raise pytz.AmbiguousTimeError(
  530. f"Cannot infer dst time from {stamp} as there "
  531. "are no repeated times"
  532. )
  533. # Split the array into contiguous chunks (where the difference between
  534. # indices is 1). These are effectively dst transitions in different
  535. # years which is useful for checking that there is not an ambiguous
  536. # transition in an individual year.
  537. if trans_idx.size > 0:
  538. one_diff = np.where(np.diff(trans_idx) != 1)[0] + 1
  539. trans_grp = np.array_split(trans_idx, one_diff)
  540. # Iterate through each day, if there are no hours where the
  541. # delta is negative (indicates a repeat of hour) the switch
  542. # cannot be inferred
  543. for grp in trans_grp:
  544. delta = np.diff(result_a[grp])
  545. if grp.size == 1 or np.all(delta > 0):
  546. # see test_tz_localize_to_utc_ambiguous_infer
  547. stamp = _render_tstamp(vals[grp[0]], creso=creso)
  548. raise pytz.AmbiguousTimeError(stamp)
  549. # Find the index for the switch and pull from a for dst and b
  550. # for standard
  551. switch_idxs = (delta <= 0).nonzero()[0]
  552. if switch_idxs.size > 1:
  553. # see test_tz_localize_to_utc_ambiguous_infer
  554. raise pytz.AmbiguousTimeError(
  555. f"There are {switch_idxs.size} dst switches when "
  556. "there should only be 1."
  557. )
  558. switch_idx = switch_idxs[0] + 1
  559. # Pull the only index and adjust
  560. a_idx = grp[:switch_idx]
  561. b_idx = grp[switch_idx:]
  562. dst_hours[grp] = np.hstack((result_a[a_idx], result_b[b_idx]))
  563. return dst_hours
  564. # ----------------------------------------------------------------------
  565. # Timezone Conversion
  566. cpdef int64_t tz_convert_from_utc_single(
  567. int64_t utc_val, tzinfo tz, NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_ns
  568. ) except? -1:
  569. """
  570. Convert the val (in i8) from UTC to tz
  571. This is a single value version of tz_convert_from_utc.
  572. Parameters
  573. ----------
  574. utc_val : int64
  575. tz : tzinfo
  576. creso : NPY_DATETIMEUNIT, default NPY_FR_ns
  577. Returns
  578. -------
  579. converted: int64
  580. """
  581. cdef:
  582. Localizer info = Localizer(tz, creso=creso)
  583. Py_ssize_t pos
  584. # Note: caller is responsible for ensuring utc_val != NPY_NAT
  585. return info.utc_val_to_local_val(utc_val, &pos)
  586. # OSError may be thrown by tzlocal on windows at or close to 1970-01-01
  587. # see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241
  588. cdef int64_t _tz_localize_using_tzinfo_api(
  589. int64_t val,
  590. tzinfo tz,
  591. bint to_utc=True,
  592. NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_ns,
  593. bint* fold=NULL,
  594. ) except? -1:
  595. """
  596. Convert the i8 representation of a datetime from a general-case timezone to
  597. UTC, or vice-versa using the datetime/tzinfo API.
  598. Private, not intended for use outside of tslibs.tzconversion.
  599. Parameters
  600. ----------
  601. val : int64_t
  602. tz : tzinfo
  603. to_utc : bint
  604. True if converting _to_ UTC, False if going the other direction.
  605. creso : NPY_DATETIMEUNIT
  606. fold : bint*, default NULL
  607. pointer to fold: whether datetime ends up in a fold or not
  608. after adjustment.
  609. Only passed with to_utc=False.
  610. Returns
  611. -------
  612. delta : int64_t
  613. Value to add when converting from utc, subtract when converting to utc.
  614. Notes
  615. -----
  616. Sets fold by pointer
  617. """
  618. cdef:
  619. npy_datetimestruct dts
  620. datetime dt
  621. int64_t delta
  622. timedelta td
  623. int64_t pps = periods_per_second(creso)
  624. pandas_datetime_to_datetimestruct(val, creso, &dts)
  625. # datetime_new is cython-optimized constructor
  626. if not to_utc:
  627. # tz.utcoffset only makes sense if datetime
  628. # is _wall time_, so if val is a UTC timestamp convert to wall time
  629. dt = _astimezone(dts, tz)
  630. if fold is not NULL:
  631. # NB: fold is only passed with to_utc=False
  632. fold[0] = dt.fold
  633. else:
  634. dt = datetime_new(dts.year, dts.month, dts.day, dts.hour,
  635. dts.min, dts.sec, dts.us, None)
  636. td = tz.utcoffset(dt)
  637. delta = int(td.total_seconds() * pps)
  638. return delta
  639. cdef datetime _astimezone(npy_datetimestruct dts, tzinfo tz):
  640. """
  641. Optimized equivalent to:
  642. dt = datetime(dts.year, dts.month, dts.day, dts.hour,
  643. dts.min, dts.sec, dts.us, utc_stdlib)
  644. dt = dt.astimezone(tz)
  645. Derived from the datetime.astimezone implementation at
  646. https://github.com/python/cpython/blob/main/Modules/_datetimemodule.c#L6187
  647. NB: we are assuming tz is not None.
  648. """
  649. cdef:
  650. datetime result
  651. result = datetime_new(dts.year, dts.month, dts.day, dts.hour,
  652. dts.min, dts.sec, dts.us, tz)
  653. return tz.fromutc(result)
  654. # NB: relies on dateutil internals, subject to change.
  655. @cython.boundscheck(False)
  656. @cython.wraparound(False)
  657. cdef bint _infer_dateutil_fold(
  658. int64_t value,
  659. const int64_t[::1] trans,
  660. const int64_t[::1] deltas,
  661. Py_ssize_t pos,
  662. ):
  663. """
  664. Infer _TSObject fold property from value by assuming 0 and then setting
  665. to 1 if necessary.
  666. Parameters
  667. ----------
  668. value : int64_t
  669. trans : ndarray[int64_t]
  670. ndarray of offset transition points in nanoseconds since epoch.
  671. deltas : int64_t[:]
  672. array of offsets corresponding to transition points in trans.
  673. pos : Py_ssize_t
  674. Position of the last transition point before taking fold into account.
  675. Returns
  676. -------
  677. bint
  678. Due to daylight saving time, one wall clock time can occur twice
  679. when shifting from summer to winter time; fold describes whether the
  680. datetime-like corresponds to the first (0) or the second time (1)
  681. the wall clock hits the ambiguous time
  682. References
  683. ----------
  684. .. [1] "PEP 495 - Local Time Disambiguation"
  685. https://www.python.org/dev/peps/pep-0495/#the-fold-attribute
  686. """
  687. cdef:
  688. bint fold = 0
  689. int64_t fold_delta
  690. if pos > 0:
  691. fold_delta = deltas[pos - 1] - deltas[pos]
  692. if value - fold_delta < trans[pos]:
  693. fold = 1
  694. return fold