123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379 |
- cimport cython
- from cpython.datetime cimport (
- date,
- datetime,
- time,
- tzinfo,
- )
- import numpy as np
- cimport numpy as cnp
- from numpy cimport (
- int64_t,
- ndarray,
- )
- cnp.import_array()
- from .dtypes import Resolution
- from .dtypes cimport (
- c_Resolution,
- periods_per_day,
- )
- from .nattype cimport (
- NPY_NAT,
- c_NaT as NaT,
- )
- from .np_datetime cimport (
- NPY_DATETIMEUNIT,
- NPY_FR_ns,
- npy_datetimestruct,
- pandas_datetime_to_datetimestruct,
- )
- from .period cimport get_period_ordinal
- from .timestamps cimport create_timestamp_from_ts
- from .timezones cimport is_utc
- from .tzconversion cimport Localizer
- @cython.boundscheck(False)
- @cython.wraparound(False)
- def tz_convert_from_utc(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso=NPY_FR_ns):
- # stamps is int64_t, arbitrary ndim
- """
- Convert the values (in i8) from UTC to tz
- Parameters
- ----------
- stamps : ndarray[int64]
- tz : tzinfo
- Returns
- -------
- ndarray[int64]
- """
- cdef:
- Localizer info = Localizer(tz, creso=reso)
- int64_t utc_val, local_val
- Py_ssize_t pos, i, n = stamps.size
- ndarray result
- cnp.broadcast mi
- if tz is None or is_utc(tz) or stamps.size == 0:
- # Much faster than going through the "standard" pattern below
- return stamps.copy()
- result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_INT64, 0)
- mi = cnp.PyArray_MultiIterNew2(result, stamps)
- for i in range(n):
- # Analogous to: utc_val = stamps[i]
- utc_val = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
- if utc_val == NPY_NAT:
- local_val = NPY_NAT
- else:
- local_val = info.utc_val_to_local_val(utc_val, &pos)
- # Analogous to: result[i] = local_val
- (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = local_val
- cnp.PyArray_MultiIter_NEXT(mi)
- return result
- # -------------------------------------------------------------------------
- @cython.wraparound(False)
- @cython.boundscheck(False)
- def ints_to_pydatetime(
- ndarray stamps,
- tzinfo tz=None,
- str box="datetime",
- NPY_DATETIMEUNIT reso=NPY_FR_ns,
- ) -> np.ndarray:
- # stamps is int64, arbitrary ndim
- """
- Convert an i8 repr to an ndarray of datetimes, date, time or Timestamp.
- Parameters
- ----------
- stamps : array of i8
- tz : str, optional
- convert to this timezone
- box : {'datetime', 'timestamp', 'date', 'time'}, default 'datetime'
- * If datetime, convert to datetime.datetime
- * If date, convert to datetime.date
- * If time, convert to datetime.time
- * If Timestamp, convert to pandas.Timestamp
- reso : NPY_DATETIMEUNIT, default NPY_FR_ns
- Returns
- -------
- ndarray[object] of type specified by box
- """
- cdef:
- Localizer info = Localizer(tz, creso=reso)
- int64_t utc_val, local_val
- Py_ssize_t i, n = stamps.size
- Py_ssize_t pos = -1 # unused, avoid not-initialized warning
- npy_datetimestruct dts
- tzinfo new_tz
- bint use_date = False, use_ts = False, use_pydt = False
- object res_val
- bint fold = 0
- # Note that `result` (and thus `result_flat`) is C-order and
- # `it` iterates C-order as well, so the iteration matches
- # See discussion at
- # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305
- ndarray result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_OBJECT, 0)
- object[::1] res_flat = result.ravel() # should NOT be a copy
- cnp.flatiter it = cnp.PyArray_IterNew(stamps)
- if box == "date":
- assert (tz is None), "tz should be None when converting to date"
- use_date = True
- elif box == "timestamp":
- use_ts = True
- elif box == "datetime":
- use_pydt = True
- elif box != "time":
- raise ValueError(
- "box must be one of 'datetime', 'date', 'time' or 'timestamp'"
- )
- for i in range(n):
- # Analogous to: utc_val = stamps[i]
- utc_val = (<int64_t*>cnp.PyArray_ITER_DATA(it))[0]
- new_tz = tz
- if utc_val == NPY_NAT:
- res_val = <object>NaT
- else:
- local_val = info.utc_val_to_local_val(utc_val, &pos, &fold)
- if info.use_pytz:
- # find right representation of dst etc in pytz timezone
- new_tz = tz._tzinfos[tz._transition_info[pos]]
- pandas_datetime_to_datetimestruct(local_val, reso, &dts)
- if use_ts:
- res_val = create_timestamp_from_ts(
- utc_val, dts, new_tz, fold, reso=reso
- )
- elif use_pydt:
- res_val = datetime(
- dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us,
- new_tz, fold=fold,
- )
- elif use_date:
- res_val = date(dts.year, dts.month, dts.day)
- else:
- res_val = time(dts.hour, dts.min, dts.sec, dts.us, new_tz, fold=fold)
- # Note: we can index result directly instead of using PyArray_MultiIter_DATA
- # like we do for the other functions because result is known C-contiguous
- # and is the first argument to PyArray_MultiIterNew2. The usual pattern
- # does not seem to work with object dtype.
- # See discussion at
- # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305
- res_flat[i] = res_val
- cnp.PyArray_ITER_NEXT(it)
- return result
- # -------------------------------------------------------------------------
- cdef c_Resolution _reso_stamp(npy_datetimestruct *dts):
- if dts.ps != 0:
- return c_Resolution.RESO_NS
- elif dts.us != 0:
- if dts.us % 1000 == 0:
- return c_Resolution.RESO_MS
- return c_Resolution.RESO_US
- elif dts.sec != 0:
- return c_Resolution.RESO_SEC
- elif dts.min != 0:
- return c_Resolution.RESO_MIN
- elif dts.hour != 0:
- return c_Resolution.RESO_HR
- return c_Resolution.RESO_DAY
- @cython.wraparound(False)
- @cython.boundscheck(False)
- def get_resolution(
- ndarray stamps, tzinfo tz=None, NPY_DATETIMEUNIT reso=NPY_FR_ns
- ) -> Resolution:
- # stamps is int64_t, any ndim
- cdef:
- Localizer info = Localizer(tz, creso=reso)
- int64_t utc_val, local_val
- Py_ssize_t i, n = stamps.size
- Py_ssize_t pos = -1 # unused, avoid not-initialized warning
- cnp.flatiter it = cnp.PyArray_IterNew(stamps)
- npy_datetimestruct dts
- c_Resolution pd_reso = c_Resolution.RESO_DAY, curr_reso
- for i in range(n):
- # Analogous to: utc_val = stamps[i]
- utc_val = cnp.PyArray_GETITEM(stamps, cnp.PyArray_ITER_DATA(it))
- if utc_val == NPY_NAT:
- pass
- else:
- local_val = info.utc_val_to_local_val(utc_val, &pos)
- pandas_datetime_to_datetimestruct(local_val, reso, &dts)
- curr_reso = _reso_stamp(&dts)
- if curr_reso < pd_reso:
- pd_reso = curr_reso
- cnp.PyArray_ITER_NEXT(it)
- return Resolution(pd_reso)
- # -------------------------------------------------------------------------
- @cython.cdivision(False)
- @cython.wraparound(False)
- @cython.boundscheck(False)
- cpdef ndarray normalize_i8_timestamps(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso):
- # stamps is int64_t, arbitrary ndim
- """
- Normalize each of the (nanosecond) timezone aware timestamps in the given
- array by rounding down to the beginning of the day (i.e. midnight).
- This is midnight for timezone, `tz`.
- Parameters
- ----------
- stamps : int64 ndarray
- tz : tzinfo or None
- reso : NPY_DATETIMEUNIT
- Returns
- -------
- result : int64 ndarray of converted of normalized nanosecond timestamps
- """
- cdef:
- Localizer info = Localizer(tz, creso=reso)
- int64_t utc_val, local_val, res_val
- Py_ssize_t i, n = stamps.size
- Py_ssize_t pos = -1 # unused, avoid not-initialized warning
- ndarray result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_INT64, 0)
- cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, stamps)
- int64_t ppd = periods_per_day(reso)
- for i in range(n):
- # Analogous to: utc_val = stamps[i]
- utc_val = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
- if utc_val == NPY_NAT:
- res_val = NPY_NAT
- else:
- local_val = info.utc_val_to_local_val(utc_val, &pos)
- res_val = local_val - (local_val % ppd)
- # Analogous to: result[i] = res_val
- (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val
- cnp.PyArray_MultiIter_NEXT(mi)
- return result
- @cython.wraparound(False)
- @cython.boundscheck(False)
- def is_date_array_normalized(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso) -> bool:
- # stamps is int64_t, arbitrary ndim
- """
- Check if all of the given (nanosecond) timestamps are normalized to
- midnight, i.e. hour == minute == second == 0. If the optional timezone
- `tz` is not None, then this is midnight for this timezone.
- Parameters
- ----------
- stamps : int64 ndarray
- tz : tzinfo or None
- reso : NPY_DATETIMEUNIT
- Returns
- -------
- is_normalized : bool True if all stamps are normalized
- """
- cdef:
- Localizer info = Localizer(tz, creso=reso)
- int64_t utc_val, local_val
- Py_ssize_t i, n = stamps.size
- Py_ssize_t pos = -1 # unused, avoid not-initialized warning
- cnp.flatiter it = cnp.PyArray_IterNew(stamps)
- int64_t ppd = periods_per_day(reso)
- for i in range(n):
- # Analogous to: utc_val = stamps[i]
- utc_val = cnp.PyArray_GETITEM(stamps, cnp.PyArray_ITER_DATA(it))
- local_val = info.utc_val_to_local_val(utc_val, &pos)
- if local_val % ppd != 0:
- return False
- cnp.PyArray_ITER_NEXT(it)
- return True
- # -------------------------------------------------------------------------
- @cython.wraparound(False)
- @cython.boundscheck(False)
- def dt64arr_to_periodarr(
- ndarray stamps, int freq, tzinfo tz, NPY_DATETIMEUNIT reso=NPY_FR_ns
- ):
- # stamps is int64_t, arbitrary ndim
- cdef:
- Localizer info = Localizer(tz, creso=reso)
- Py_ssize_t i, n = stamps.size
- Py_ssize_t pos = -1 # unused, avoid not-initialized warning
- int64_t utc_val, local_val, res_val
- npy_datetimestruct dts
- ndarray result = cnp.PyArray_EMPTY(stamps.ndim, stamps.shape, cnp.NPY_INT64, 0)
- cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, stamps)
- for i in range(n):
- # Analogous to: utc_val = stamps[i]
- utc_val = (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
- if utc_val == NPY_NAT:
- res_val = NPY_NAT
- else:
- local_val = info.utc_val_to_local_val(utc_val, &pos)
- pandas_datetime_to_datetimestruct(local_val, reso, &dts)
- res_val = get_period_ordinal(&dts, freq)
- # Analogous to: result[i] = res_val
- (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = res_val
- cnp.PyArray_MultiIter_NEXT(mi)
- return result
|