vectorized.pyx 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379
  1. cimport cython
  2. from cpython.datetime cimport (
  3. date,
  4. datetime,
  5. time,
  6. tzinfo,
  7. )
  8. import numpy as np
  9. cimport numpy as cnp
  10. from numpy cimport (
  11. int64_t,
  12. ndarray,
  13. )
  14. cnp.import_array()
  15. from .dtypes import Resolution
  16. from .dtypes cimport (
  17. c_Resolution,
  18. periods_per_day,
  19. )
  20. from .nattype cimport (
  21. NPY_NAT,
  22. c_NaT as NaT,
  23. )
  24. from .np_datetime cimport (
  25. NPY_DATETIMEUNIT,
  26. NPY_FR_ns,
  27. npy_datetimestruct,
  28. pandas_datetime_to_datetimestruct,
  29. )
  30. from .period cimport get_period_ordinal
  31. from .timestamps cimport create_timestamp_from_ts
  32. from .timezones cimport is_utc
  33. from .tzconversion cimport Localizer
  34. @cython.boundscheck(False)
  35. @cython.wraparound(False)
  36. def tz_convert_from_utc(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso=NPY_FR_ns):
  37. # stamps is int64_t, arbitrary ndim
  38. """
  39. Convert the values (in i8) from UTC to tz
  40. Parameters
  41. ----------
  42. stamps : ndarray[int64]
  43. tz : tzinfo
  44. Returns
  45. -------
  46. ndarray[int64]
  47. """
  48. cdef:
  49. Localizer info = Localizer(tz, creso=reso)
  50. int64_t utc_val, local_val
  51. Py_ssize_t pos, i, n = stamps.size
  52. ndarray result
  53. cnp.broadcast mi
  54. if tz is None or is_utc(tz) or stamps.size == 0:
  55. # Much faster than going through the "standard" pattern below
  56. return stamps.copy()
  57. result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_INT64, 0)
  58. mi = cnp.PyArray_MultiIterNew2(result, stamps)
  59. for i in range(n):
  60. # Analogous to: utc_val = stamps[i]
  61. utc_val = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
  62. if utc_val == NPY_NAT:
  63. local_val = NPY_NAT
  64. else:
  65. local_val = info.utc_val_to_local_val(utc_val, &pos)
  66. # Analogous to: result[i] = local_val
  67. (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = local_val
  68. cnp.PyArray_MultiIter_NEXT(mi)
  69. return result
  70. # -------------------------------------------------------------------------
  71. @cython.wraparound(False)
  72. @cython.boundscheck(False)
  73. def ints_to_pydatetime(
  74. ndarray stamps,
  75. tzinfo tz=None,
  76. str box="datetime",
  77. NPY_DATETIMEUNIT reso=NPY_FR_ns,
  78. ) -> np.ndarray:
  79. # stamps is int64, arbitrary ndim
  80. """
  81. Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp.
  82. Parameters
  83. ----------
  84. stamps : array of i8
  85. tz : str, optional
  86. convert to this timezone
  87. box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime'
  88. * If datetime, convert to datetime.datetime
  89. * If date, convert to datetime.date
  90. * If time, convert to datetime.time
  91. * If Timestamp, convert to pandas.Timestamp
  92. reso : NPY_DATETIMEUNIT, default NPY_FR_ns
  93. Returns
  94. -------
  95. ndarray[object] of type specified by box
  96. """
  97. cdef:
  98. Localizer info = Localizer(tz, creso=reso)
  99. int64_t utc_val, local_val
  100. Py_ssize_t i, n = stamps.size
  101. Py_ssize_t pos = -1 # unused, avoid not-initialized warning
  102. npy_datetimestruct dts
  103. tzinfo new_tz
  104. bint use_date = False, use_ts = False, use_pydt = False
  105. object res_val
  106. bint fold = 0
  107. # Note that `result` (and thus `result_flat`) is C-order and
  108. # `it` iterates C-order as well, so the iteration matches
  109. # See discussion at
  110. # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305
  111. ndarray result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_OBJECT, 0)
  112. object[::1] res_flat = result.ravel() # should NOT be a copy
  113. cnp.flatiter it = cnp.PyArray_IterNew(stamps)
  114. if box == "date":
  115. assert (tz is None), "tz should be None when converting to date"
  116. use_date = True
  117. elif box == "timestamp":
  118. use_ts = True
  119. elif box == "datetime":
  120. use_pydt = True
  121. elif box != "time":
  122. raise ValueError(
  123. "box must be one of 'datetime', 'date', 'time' or 'timestamp'"
  124. )
  125. for i in range(n):
  126. # Analogous to: utc_val = stamps[i]
  127. utc_val = (<int64_t*>cnp.PyArray_ITER_DATA(it))[0]
  128. new_tz = tz
  129. if utc_val == NPY_NAT:
  130. res_val = <object>NaT
  131. else:
  132. local_val = info.utc_val_to_local_val(utc_val, &pos, &fold)
  133. if info.use_pytz:
  134. # find right representation of dst etc in pytz timezone
  135. new_tz = tz._tzinfos[tz._transition_info[pos]]
  136. pandas_datetime_to_datetimestruct(local_val, reso, &dts)
  137. if use_ts:
  138. res_val = create_timestamp_from_ts(
  139. utc_val, dts, new_tz, fold, reso=reso
  140. )
  141. elif use_pydt:
  142. res_val = datetime(
  143. dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us,
  144. new_tz, fold=fold,
  145. )
  146. elif use_date:
  147. res_val = date(dts.year, dts.month, dts.day)
  148. else:
  149. res_val = time(dts.hour, dts.min, dts.sec, dts.us, new_tz, fold=fold)
  150. # Note: we can index result directly instead of using PyArray_MultiIter_DATA
  151. # like we do for the other functions because result is known C-contiguous
  152. # and is the first argument to PyArray_MultiIterNew2. The usual pattern
  153. # does not seem to work with object dtype.
  154. # See discussion at
  155. # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305
  156. res_flat[i] = res_val
  157. cnp.PyArray_ITER_NEXT(it)
  158. return result
  159. # -------------------------------------------------------------------------
  160. cdef c_Resolution _reso_stamp(npy_datetimestruct *dts):
  161. if dts.ps != 0:
  162. return c_Resolution.RESO_NS
  163. elif dts.us != 0:
  164. if dts.us % 1000 == 0:
  165. return c_Resolution.RESO_MS
  166. return c_Resolution.RESO_US
  167. elif dts.sec != 0:
  168. return c_Resolution.RESO_SEC
  169. elif dts.min != 0:
  170. return c_Resolution.RESO_MIN
  171. elif dts.hour != 0:
  172. return c_Resolution.RESO_HR
  173. return c_Resolution.RESO_DAY
  174. @cython.wraparound(False)
  175. @cython.boundscheck(False)
  176. def get_resolution(
  177. ndarray stamps, tzinfo tz=None, NPY_DATETIMEUNIT reso=NPY_FR_ns
  178. ) -> Resolution:
  179. # stamps is int64_t, any ndim
  180. cdef:
  181. Localizer info = Localizer(tz, creso=reso)
  182. int64_t utc_val, local_val
  183. Py_ssize_t i, n = stamps.size
  184. Py_ssize_t pos = -1 # unused, avoid not-initialized warning
  185. cnp.flatiter it = cnp.PyArray_IterNew(stamps)
  186. npy_datetimestruct dts
  187. c_Resolution pd_reso = c_Resolution.RESO_DAY, curr_reso
  188. for i in range(n):
  189. # Analogous to: utc_val = stamps[i]
  190. utc_val = cnp.PyArray_GETITEM(stamps, cnp.PyArray_ITER_DATA(it))
  191. if utc_val == NPY_NAT:
  192. pass
  193. else:
  194. local_val = info.utc_val_to_local_val(utc_val, &pos)
  195. pandas_datetime_to_datetimestruct(local_val, reso, &dts)
  196. curr_reso = _reso_stamp(&dts)
  197. if curr_reso < pd_reso:
  198. pd_reso = curr_reso
  199. cnp.PyArray_ITER_NEXT(it)
  200. return Resolution(pd_reso)
  201. # -------------------------------------------------------------------------
  202. @cython.cdivision(False)
  203. @cython.wraparound(False)
  204. @cython.boundscheck(False)
  205. cpdef ndarray normalize_i8_timestamps(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso):
  206. # stamps is int64_t, arbitrary ndim
  207. """
  208. Normalize each of the (nanosecond) timezone aware timestamps in the given
  209. array by rounding down to the beginning of the day (i.e. midnight).
  210. This is midnight for timezone, `tz`.
  211. Parameters
  212. ----------
  213. stamps : int64 ndarray
  214. tz : tzinfo or None
  215. reso : NPY_DATETIMEUNIT
  216. Returns
  217. -------
  218. result : int64 ndarray of converted of normalized nanosecond timestamps
  219. """
  220. cdef:
  221. Localizer info = Localizer(tz, creso=reso)
  222. int64_t utc_val, local_val, res_val
  223. Py_ssize_t i, n = stamps.size
  224. Py_ssize_t pos = -1 # unused, avoid not-initialized warning
  225. ndarray result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_INT64, 0)
  226. cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, stamps)
  227. int64_t ppd = periods_per_day(reso)
  228. for i in range(n):
  229. # Analogous to: utc_val = stamps[i]
  230. utc_val = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
  231. if utc_val == NPY_NAT:
  232. res_val = NPY_NAT
  233. else:
  234. local_val = info.utc_val_to_local_val(utc_val, &pos)
  235. res_val = local_val - (local_val % ppd)
  236. # Analogous to: result[i] = res_val
  237. (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val
  238. cnp.PyArray_MultiIter_NEXT(mi)
  239. return result
  240. @cython.wraparound(False)
  241. @cython.boundscheck(False)
  242. def is_date_array_normalized(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso) -> bool:
  243. # stamps is int64_t, arbitrary ndim
  244. """
  245. Check if all of the given (nanosecond) timestamps are normalized to
  246. midnight, i.e. hour == minute == second == 0. If the optional timezone
  247. `tz` is not None, then this is midnight for this timezone.
  248. Parameters
  249. ----------
  250. stamps : int64 ndarray
  251. tz : tzinfo or None
  252. reso : NPY_DATETIMEUNIT
  253. Returns
  254. -------
  255. is_normalized : bool True if all stamps are normalized
  256. """
  257. cdef:
  258. Localizer info = Localizer(tz, creso=reso)
  259. int64_t utc_val, local_val
  260. Py_ssize_t i, n = stamps.size
  261. Py_ssize_t pos = -1 # unused, avoid not-initialized warning
  262. cnp.flatiter it = cnp.PyArray_IterNew(stamps)
  263. int64_t ppd = periods_per_day(reso)
  264. for i in range(n):
  265. # Analogous to: utc_val = stamps[i]
  266. utc_val = cnp.PyArray_GETITEM(stamps, cnp.PyArray_ITER_DATA(it))
  267. local_val = info.utc_val_to_local_val(utc_val, &pos)
  268. if local_val % ppd != 0:
  269. return False
  270. cnp.PyArray_ITER_NEXT(it)
  271. return True
  272. # -------------------------------------------------------------------------
  273. @cython.wraparound(False)
  274. @cython.boundscheck(False)
  275. def dt64arr_to_periodarr(
  276. ndarray stamps, int freq, tzinfo tz, NPY_DATETIMEUNIT reso=NPY_FR_ns
  277. ):
  278. # stamps is int64_t, arbitrary ndim
  279. cdef:
  280. Localizer info = Localizer(tz, creso=reso)
  281. Py_ssize_t i, n = stamps.size
  282. Py_ssize_t pos = -1 # unused, avoid not-initialized warning
  283. int64_t utc_val, local_val, res_val
  284. npy_datetimestruct dts
  285. ndarray result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_INT64, 0)
  286. cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, stamps)
  287. for i in range(n):
  288. # Analogous to: utc_val = stamps[i]
  289. utc_val = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
  290. if utc_val == NPY_NAT:
  291. res_val = NPY_NAT
  292. else:
  293. local_val = info.utc_val_to_local_val(utc_val, &pos)
  294. pandas_datetime_to_datetimestruct(local_val, reso, &dts)
  295. res_val = get_period_ordinal(&dts, freq)
  296. # Analogous to: result[i] = res_val
  297. (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val
  298. cnp.PyArray_MultiIter_NEXT(mi)
  299. return result