timedeltas.py 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062
  1. from __future__ import annotations
  2. from datetime import timedelta
  3. import operator
  4. from typing import (
  5. TYPE_CHECKING,
  6. Iterator,
  7. cast,
  8. )
  9. import warnings
  10. import numpy as np
  11. from pandas._libs import (
  12. lib,
  13. tslibs,
  14. )
  15. from pandas._libs.tslibs import (
  16. BaseOffset,
  17. NaT,
  18. NaTType,
  19. Tick,
  20. Timedelta,
  21. astype_overflowsafe,
  22. get_supported_reso,
  23. get_unit_from_dtype,
  24. iNaT,
  25. is_supported_unit,
  26. npy_unit_to_abbrev,
  27. periods_per_second,
  28. to_offset,
  29. )
  30. from pandas._libs.tslibs.conversion import precision_from_unit
  31. from pandas._libs.tslibs.fields import (
  32. get_timedelta_days,
  33. get_timedelta_field,
  34. )
  35. from pandas._libs.tslibs.timedeltas import (
  36. array_to_timedelta64,
  37. floordiv_object_array,
  38. ints_to_pytimedelta,
  39. parse_timedelta_unit,
  40. truediv_object_array,
  41. )
  42. from pandas._typing import (
  43. AxisInt,
  44. DateTimeErrorChoices,
  45. DtypeObj,
  46. NpDtype,
  47. npt,
  48. )
  49. from pandas.compat.numpy import function as nv
  50. from pandas.util._validators import validate_endpoints
  51. from pandas.core.dtypes.common import (
  52. TD64NS_DTYPE,
  53. is_dtype_equal,
  54. is_extension_array_dtype,
  55. is_float_dtype,
  56. is_integer_dtype,
  57. is_object_dtype,
  58. is_scalar,
  59. is_string_dtype,
  60. is_timedelta64_dtype,
  61. pandas_dtype,
  62. )
  63. from pandas.core.dtypes.missing import isna
  64. from pandas.core import nanops
  65. from pandas.core.array_algos import datetimelike_accumulations
  66. from pandas.core.arrays import datetimelike as dtl
  67. from pandas.core.arrays._ranges import generate_regular_range
  68. import pandas.core.common as com
  69. from pandas.core.ops import roperator
  70. from pandas.core.ops.common import unpack_zerodim_and_defer
  71. if TYPE_CHECKING:
  72. from pandas import DataFrame
  73. def _field_accessor(name: str, alias: str, docstring: str):
  74. def f(self) -> np.ndarray:
  75. values = self.asi8
  76. if alias == "days":
  77. result = get_timedelta_days(values, reso=self._creso)
  78. else:
  79. # error: Incompatible types in assignment (
  80. # expression has type "ndarray[Any, dtype[signedinteger[_32Bit]]]",
  81. # variable has type "ndarray[Any, dtype[signedinteger[_64Bit]]]
  82. result = get_timedelta_field(values, alias, reso=self._creso) # type: ignore[assignment] # noqa: E501
  83. if self._hasna:
  84. result = self._maybe_mask_results(
  85. result, fill_value=None, convert="float64"
  86. )
  87. return result
  88. f.__name__ = name
  89. f.__doc__ = f"\n{docstring}\n"
  90. return property(f)
  91. class TimedeltaArray(dtl.TimelikeOps):
  92. """
  93. Pandas ExtensionArray for timedelta data.
  94. .. warning::
  95. TimedeltaArray is currently experimental, and its API may change
  96. without warning. In particular, :attr:`TimedeltaArray.dtype` is
  97. expected to change to be an instance of an ``ExtensionDtype``
  98. subclass.
  99. Parameters
  100. ----------
  101. values : array-like
  102. The timedelta data.
  103. dtype : numpy.dtype
  104. Currently, only ``numpy.dtype("timedelta64[ns]")`` is accepted.
  105. freq : Offset, optional
  106. copy : bool, default False
  107. Whether to copy the underlying array of data.
  108. Attributes
  109. ----------
  110. None
  111. Methods
  112. -------
  113. None
  114. """
  115. _typ = "timedeltaarray"
  116. _internal_fill_value = np.timedelta64("NaT", "ns")
  117. _recognized_scalars = (timedelta, np.timedelta64, Tick)
  118. _is_recognized_dtype = is_timedelta64_dtype
  119. _infer_matches = ("timedelta", "timedelta64")
  120. @property
  121. def _scalar_type(self) -> type[Timedelta]:
  122. return Timedelta
  123. __array_priority__ = 1000
  124. # define my properties & methods for delegation
  125. _other_ops: list[str] = []
  126. _bool_ops: list[str] = []
  127. _object_ops: list[str] = ["freq"]
  128. _field_ops: list[str] = ["days", "seconds", "microseconds", "nanoseconds"]
  129. _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops + ["unit"]
  130. _datetimelike_methods: list[str] = [
  131. "to_pytimedelta",
  132. "total_seconds",
  133. "round",
  134. "floor",
  135. "ceil",
  136. "as_unit",
  137. ]
  138. # Note: ndim must be defined to ensure NaT.__richcmp__(TimedeltaArray)
  139. # operates pointwise.
  140. def _box_func(self, x: np.timedelta64) -> Timedelta | NaTType:
  141. y = x.view("i8")
  142. if y == NaT._value:
  143. return NaT
  144. return Timedelta._from_value_and_reso(y, reso=self._creso)
  145. @property
  146. # error: Return type "dtype" of "dtype" incompatible with return type
  147. # "ExtensionDtype" in supertype "ExtensionArray"
  148. def dtype(self) -> np.dtype: # type: ignore[override]
  149. """
  150. The dtype for the TimedeltaArray.
  151. .. warning::
  152. A future version of pandas will change dtype to be an instance
  153. of a :class:`pandas.api.extensions.ExtensionDtype` subclass,
  154. not a ``numpy.dtype``.
  155. Returns
  156. -------
  157. numpy.dtype
  158. """
  159. return self._ndarray.dtype
  160. # ----------------------------------------------------------------
  161. # Constructors
  162. _freq = None
  163. _default_dtype = TD64NS_DTYPE # used in TimeLikeOps.__init__
  164. @classmethod
  165. def _validate_dtype(cls, values, dtype):
  166. # used in TimeLikeOps.__init__
  167. _validate_td64_dtype(values.dtype)
  168. dtype = _validate_td64_dtype(dtype)
  169. return dtype
  170. # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked"
  171. @classmethod
  172. def _simple_new( # type: ignore[override]
  173. cls, values: np.ndarray, freq: BaseOffset | None = None, dtype=TD64NS_DTYPE
  174. ) -> TimedeltaArray:
  175. # Require td64 dtype, not unit-less, matching values.dtype
  176. assert isinstance(dtype, np.dtype) and dtype.kind == "m"
  177. assert not tslibs.is_unitless(dtype)
  178. assert isinstance(values, np.ndarray), type(values)
  179. assert dtype == values.dtype
  180. result = super()._simple_new(values=values, dtype=dtype)
  181. result._freq = freq
  182. return result
  183. @classmethod
  184. def _from_sequence(cls, data, *, dtype=None, copy: bool = False) -> TimedeltaArray:
  185. if dtype:
  186. dtype = _validate_td64_dtype(dtype)
  187. data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None)
  188. freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False)
  189. if dtype is not None:
  190. data = astype_overflowsafe(data, dtype=dtype, copy=False)
  191. return cls._simple_new(data, dtype=data.dtype, freq=freq)
  192. @classmethod
  193. def _from_sequence_not_strict(
  194. cls,
  195. data,
  196. *,
  197. dtype=None,
  198. copy: bool = False,
  199. freq=lib.no_default,
  200. unit=None,
  201. ) -> TimedeltaArray:
  202. """
  203. A non-strict version of _from_sequence, called from TimedeltaIndex.__new__.
  204. """
  205. if dtype:
  206. dtype = _validate_td64_dtype(dtype)
  207. assert unit not in ["Y", "y", "M"] # caller is responsible for checking
  208. explicit_none = freq is None
  209. freq = freq if freq is not lib.no_default else None
  210. freq, freq_infer = dtl.maybe_infer_freq(freq)
  211. data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=unit)
  212. freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer)
  213. if explicit_none:
  214. freq = None
  215. if dtype is not None:
  216. data = astype_overflowsafe(data, dtype=dtype, copy=False)
  217. result = cls._simple_new(data, dtype=data.dtype, freq=freq)
  218. if inferred_freq is None and freq is not None:
  219. # this condition precludes `freq_infer`
  220. cls._validate_frequency(result, freq)
  221. elif freq_infer:
  222. # Set _freq directly to bypass duplicative _validate_frequency
  223. # check.
  224. result._freq = to_offset(result.inferred_freq)
  225. return result
  226. # Signature of "_generate_range" incompatible with supertype
  227. # "DatetimeLikeArrayMixin"
  228. @classmethod
  229. def _generate_range( # type: ignore[override]
  230. cls, start, end, periods, freq, closed=None, *, unit: str | None = None
  231. ):
  232. periods = dtl.validate_periods(periods)
  233. if freq is None and any(x is None for x in [periods, start, end]):
  234. raise ValueError("Must provide freq argument if no data is supplied")
  235. if com.count_not_none(start, end, periods, freq) != 3:
  236. raise ValueError(
  237. "Of the four parameters: start, end, periods, "
  238. "and freq, exactly three must be specified"
  239. )
  240. if start is not None:
  241. start = Timedelta(start).as_unit("ns")
  242. if end is not None:
  243. end = Timedelta(end).as_unit("ns")
  244. if unit is not None:
  245. if unit not in ["s", "ms", "us", "ns"]:
  246. raise ValueError("'unit' must be one of 's', 'ms', 'us', 'ns'")
  247. else:
  248. unit = "ns"
  249. if start is not None and unit is not None:
  250. start = start.as_unit(unit, round_ok=False)
  251. if end is not None and unit is not None:
  252. end = end.as_unit(unit, round_ok=False)
  253. left_closed, right_closed = validate_endpoints(closed)
  254. if freq is not None:
  255. index = generate_regular_range(start, end, periods, freq, unit=unit)
  256. else:
  257. index = np.linspace(start._value, end._value, periods).astype("i8")
  258. if not left_closed:
  259. index = index[1:]
  260. if not right_closed:
  261. index = index[:-1]
  262. td64values = index.view(f"m8[{unit}]")
  263. return cls._simple_new(td64values, dtype=td64values.dtype, freq=freq)
  264. # ----------------------------------------------------------------
  265. # DatetimeLike Interface
  266. def _unbox_scalar(self, value) -> np.timedelta64:
  267. if not isinstance(value, self._scalar_type) and value is not NaT:
  268. raise ValueError("'value' should be a Timedelta.")
  269. self._check_compatible_with(value)
  270. if value is NaT:
  271. return np.timedelta64(value._value, self.unit)
  272. else:
  273. return value.as_unit(self.unit).asm8
  274. def _scalar_from_string(self, value) -> Timedelta | NaTType:
  275. return Timedelta(value)
  276. def _check_compatible_with(self, other) -> None:
  277. # we don't have anything to validate.
  278. pass
  279. # ----------------------------------------------------------------
  280. # Array-Like / EA-Interface Methods
  281. def astype(self, dtype, copy: bool = True):
  282. # We handle
  283. # --> timedelta64[ns]
  284. # --> timedelta64
  285. # DatetimeLikeArrayMixin super call handles other cases
  286. dtype = pandas_dtype(dtype)
  287. if isinstance(dtype, np.dtype) and dtype.kind == "m":
  288. if dtype == self.dtype:
  289. if copy:
  290. return self.copy()
  291. return self
  292. if is_supported_unit(get_unit_from_dtype(dtype)):
  293. # unit conversion e.g. timedelta64[s]
  294. res_values = astype_overflowsafe(self._ndarray, dtype, copy=False)
  295. return type(self)._simple_new(
  296. res_values, dtype=res_values.dtype, freq=self.freq
  297. )
  298. else:
  299. raise ValueError(
  300. f"Cannot convert from {self.dtype} to {dtype}. "
  301. "Supported resolutions are 's', 'ms', 'us', 'ns'"
  302. )
  303. return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy=copy)
  304. def __iter__(self) -> Iterator:
  305. if self.ndim > 1:
  306. for i in range(len(self)):
  307. yield self[i]
  308. else:
  309. # convert in chunks of 10k for efficiency
  310. data = self._ndarray
  311. length = len(self)
  312. chunksize = 10000
  313. chunks = (length // chunksize) + 1
  314. for i in range(chunks):
  315. start_i = i * chunksize
  316. end_i = min((i + 1) * chunksize, length)
  317. converted = ints_to_pytimedelta(data[start_i:end_i], box=True)
  318. yield from converted
  319. # ----------------------------------------------------------------
  320. # Reductions
  321. def sum(
  322. self,
  323. *,
  324. axis: AxisInt | None = None,
  325. dtype: NpDtype | None = None,
  326. out=None,
  327. keepdims: bool = False,
  328. initial=None,
  329. skipna: bool = True,
  330. min_count: int = 0,
  331. ):
  332. nv.validate_sum(
  333. (), {"dtype": dtype, "out": out, "keepdims": keepdims, "initial": initial}
  334. )
  335. result = nanops.nansum(
  336. self._ndarray, axis=axis, skipna=skipna, min_count=min_count
  337. )
  338. return self._wrap_reduction_result(axis, result)
  339. def std(
  340. self,
  341. *,
  342. axis: AxisInt | None = None,
  343. dtype: NpDtype | None = None,
  344. out=None,
  345. ddof: int = 1,
  346. keepdims: bool = False,
  347. skipna: bool = True,
  348. ):
  349. nv.validate_stat_ddof_func(
  350. (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std"
  351. )
  352. result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
  353. if axis is None or self.ndim == 1:
  354. return self._box_func(result)
  355. return self._from_backing_data(result)
  356. # ----------------------------------------------------------------
  357. # Accumulations
  358. def _accumulate(self, name: str, *, skipna: bool = True, **kwargs):
  359. if name == "cumsum":
  360. op = getattr(datetimelike_accumulations, name)
  361. result = op(self._ndarray.copy(), skipna=skipna, **kwargs)
  362. return type(self)._simple_new(result, freq=None, dtype=self.dtype)
  363. elif name == "cumprod":
  364. raise TypeError("cumprod not supported for Timedelta.")
  365. else:
  366. return super()._accumulate(name, skipna=skipna, **kwargs)
  367. # ----------------------------------------------------------------
  368. # Rendering Methods
  369. def _formatter(self, boxed: bool = False):
  370. from pandas.io.formats.format import get_format_timedelta64
  371. return get_format_timedelta64(self, box=True)
  372. def _format_native_types(
  373. self, *, na_rep: str | float = "NaT", date_format=None, **kwargs
  374. ) -> npt.NDArray[np.object_]:
  375. from pandas.io.formats.format import get_format_timedelta64
  376. # Relies on TimeDelta._repr_base
  377. formatter = get_format_timedelta64(self._ndarray, na_rep)
  378. # equiv: np.array([formatter(x) for x in self._ndarray])
  379. # but independent of dimension
  380. return np.frompyfunc(formatter, 1, 1)(self._ndarray)
  381. # ----------------------------------------------------------------
  382. # Arithmetic Methods
  383. def _add_offset(self, other):
  384. assert not isinstance(other, Tick)
  385. raise TypeError(
  386. f"cannot add the type {type(other).__name__} to a {type(self).__name__}"
  387. )
  388. @unpack_zerodim_and_defer("__mul__")
  389. def __mul__(self, other) -> TimedeltaArray:
  390. if is_scalar(other):
  391. # numpy will accept float and int, raise TypeError for others
  392. result = self._ndarray * other
  393. freq = None
  394. if self.freq is not None and not isna(other):
  395. freq = self.freq * other
  396. return type(self)._simple_new(result, dtype=result.dtype, freq=freq)
  397. if not hasattr(other, "dtype"):
  398. # list, tuple
  399. other = np.array(other)
  400. if len(other) != len(self) and not is_timedelta64_dtype(other.dtype):
  401. # Exclude timedelta64 here so we correctly raise TypeError
  402. # for that instead of ValueError
  403. raise ValueError("Cannot multiply with unequal lengths")
  404. if is_object_dtype(other.dtype):
  405. # this multiplication will succeed only if all elements of other
  406. # are int or float scalars, so we will end up with
  407. # timedelta64[ns]-dtyped result
  408. arr = self._ndarray
  409. result = [arr[n] * other[n] for n in range(len(self))]
  410. result = np.array(result)
  411. return type(self)._simple_new(result, dtype=result.dtype)
  412. # numpy will accept float or int dtype, raise TypeError for others
  413. result = self._ndarray * other
  414. return type(self)._simple_new(result, dtype=result.dtype)
  415. __rmul__ = __mul__
  416. def _scalar_divlike_op(self, other, op):
  417. """
  418. Shared logic for __truediv__, __rtruediv__, __floordiv__, __rfloordiv__
  419. with scalar 'other'.
  420. """
  421. if isinstance(other, self._recognized_scalars):
  422. other = Timedelta(other)
  423. # mypy assumes that __new__ returns an instance of the class
  424. # github.com/python/mypy/issues/1020
  425. if cast("Timedelta | NaTType", other) is NaT:
  426. # specifically timedelta64-NaT
  427. result = np.empty(self.shape, dtype=np.float64)
  428. result.fill(np.nan)
  429. return result
  430. # otherwise, dispatch to Timedelta implementation
  431. return op(self._ndarray, other)
  432. else:
  433. # caller is responsible for checking lib.is_scalar(other)
  434. # assume other is numeric, otherwise numpy will raise
  435. if op in [roperator.rtruediv, roperator.rfloordiv]:
  436. raise TypeError(
  437. f"Cannot divide {type(other).__name__} by {type(self).__name__}"
  438. )
  439. result = op(self._ndarray, other)
  440. freq = None
  441. if self.freq is not None:
  442. # Note: freq gets division, not floor-division, even if op
  443. # is floordiv.
  444. freq = self.freq / other
  445. # TODO: 2022-12-24 test_ufunc_coercions, test_tdi_ops_attributes
  446. # get here for truediv, no tests for floordiv
  447. if op is operator.floordiv:
  448. if freq.nanos == 0 and self.freq.nanos != 0:
  449. # e.g. if self.freq is Nano(1) then dividing by 2
  450. # rounds down to zero
  451. # TODO: 2022-12-24 should implement the same check
  452. # for truediv case
  453. freq = None
  454. return type(self)._simple_new(result, dtype=result.dtype, freq=freq)
  455. def _cast_divlike_op(self, other):
  456. if not hasattr(other, "dtype"):
  457. # e.g. list, tuple
  458. other = np.array(other)
  459. if len(other) != len(self):
  460. raise ValueError("Cannot divide vectors with unequal lengths")
  461. return other
  462. def _vector_divlike_op(self, other, op) -> np.ndarray | TimedeltaArray:
  463. """
  464. Shared logic for __truediv__, __floordiv__, and their reversed versions
  465. with timedelta64-dtype ndarray other.
  466. """
  467. # Let numpy handle it
  468. result = op(self._ndarray, np.asarray(other))
  469. if (is_integer_dtype(other.dtype) or is_float_dtype(other.dtype)) and op in [
  470. operator.truediv,
  471. operator.floordiv,
  472. ]:
  473. return type(self)._simple_new(result, dtype=result.dtype)
  474. if op in [operator.floordiv, roperator.rfloordiv]:
  475. mask = self.isna() | isna(other)
  476. if mask.any():
  477. result = result.astype(np.float64)
  478. np.putmask(result, mask, np.nan)
  479. return result
  480. @unpack_zerodim_and_defer("__truediv__")
  481. def __truediv__(self, other):
  482. # timedelta / X is well-defined for timedelta-like or numeric X
  483. op = operator.truediv
  484. if is_scalar(other):
  485. return self._scalar_divlike_op(other, op)
  486. other = self._cast_divlike_op(other)
  487. if (
  488. is_timedelta64_dtype(other.dtype)
  489. or is_integer_dtype(other.dtype)
  490. or is_float_dtype(other.dtype)
  491. ):
  492. return self._vector_divlike_op(other, op)
  493. if is_object_dtype(other.dtype):
  494. other = np.asarray(other)
  495. if self.ndim > 1:
  496. res_cols = [left / right for left, right in zip(self, other)]
  497. res_cols2 = [x.reshape(1, -1) for x in res_cols]
  498. result = np.concatenate(res_cols2, axis=0)
  499. else:
  500. result = truediv_object_array(self._ndarray, other)
  501. return result
  502. else:
  503. return NotImplemented
  504. @unpack_zerodim_and_defer("__rtruediv__")
  505. def __rtruediv__(self, other):
  506. # X / timedelta is defined only for timedelta-like X
  507. op = roperator.rtruediv
  508. if is_scalar(other):
  509. return self._scalar_divlike_op(other, op)
  510. other = self._cast_divlike_op(other)
  511. if is_timedelta64_dtype(other.dtype):
  512. return self._vector_divlike_op(other, op)
  513. elif is_object_dtype(other.dtype):
  514. # Note: unlike in __truediv__, we do not _need_ to do type
  515. # inference on the result. It does not raise, a numeric array
  516. # is returned. GH#23829
  517. result_list = [other[n] / self[n] for n in range(len(self))]
  518. return np.array(result_list)
  519. else:
  520. return NotImplemented
  521. @unpack_zerodim_and_defer("__floordiv__")
  522. def __floordiv__(self, other):
  523. op = operator.floordiv
  524. if is_scalar(other):
  525. return self._scalar_divlike_op(other, op)
  526. other = self._cast_divlike_op(other)
  527. if (
  528. is_timedelta64_dtype(other.dtype)
  529. or is_integer_dtype(other.dtype)
  530. or is_float_dtype(other.dtype)
  531. ):
  532. return self._vector_divlike_op(other, op)
  533. elif is_object_dtype(other.dtype):
  534. other = np.asarray(other)
  535. if self.ndim > 1:
  536. res_cols = [left // right for left, right in zip(self, other)]
  537. res_cols2 = [x.reshape(1, -1) for x in res_cols]
  538. result = np.concatenate(res_cols2, axis=0)
  539. else:
  540. result = floordiv_object_array(self._ndarray, other)
  541. assert result.dtype == object
  542. return result
  543. else:
  544. return NotImplemented
  545. @unpack_zerodim_and_defer("__rfloordiv__")
  546. def __rfloordiv__(self, other):
  547. op = roperator.rfloordiv
  548. if is_scalar(other):
  549. return self._scalar_divlike_op(other, op)
  550. other = self._cast_divlike_op(other)
  551. if is_timedelta64_dtype(other.dtype):
  552. return self._vector_divlike_op(other, op)
  553. elif is_object_dtype(other.dtype):
  554. result_list = [other[n] // self[n] for n in range(len(self))]
  555. result = np.array(result_list)
  556. return result
  557. else:
  558. return NotImplemented
  559. @unpack_zerodim_and_defer("__mod__")
  560. def __mod__(self, other):
  561. # Note: This is a naive implementation, can likely be optimized
  562. if isinstance(other, self._recognized_scalars):
  563. other = Timedelta(other)
  564. return self - (self // other) * other
  565. @unpack_zerodim_and_defer("__rmod__")
  566. def __rmod__(self, other):
  567. # Note: This is a naive implementation, can likely be optimized
  568. if isinstance(other, self._recognized_scalars):
  569. other = Timedelta(other)
  570. return other - (other // self) * self
  571. @unpack_zerodim_and_defer("__divmod__")
  572. def __divmod__(self, other):
  573. # Note: This is a naive implementation, can likely be optimized
  574. if isinstance(other, self._recognized_scalars):
  575. other = Timedelta(other)
  576. res1 = self // other
  577. res2 = self - res1 * other
  578. return res1, res2
  579. @unpack_zerodim_and_defer("__rdivmod__")
  580. def __rdivmod__(self, other):
  581. # Note: This is a naive implementation, can likely be optimized
  582. if isinstance(other, self._recognized_scalars):
  583. other = Timedelta(other)
  584. res1 = other // self
  585. res2 = other - res1 * self
  586. return res1, res2
  587. def __neg__(self) -> TimedeltaArray:
  588. freq = None
  589. if self.freq is not None:
  590. freq = -self.freq
  591. return type(self)._simple_new(-self._ndarray, dtype=self.dtype, freq=freq)
  592. def __pos__(self) -> TimedeltaArray:
  593. return type(self)(self._ndarray.copy(), freq=self.freq)
  594. def __abs__(self) -> TimedeltaArray:
  595. # Note: freq is not preserved
  596. return type(self)(np.abs(self._ndarray))
  597. # ----------------------------------------------------------------
  598. # Conversion Methods - Vectorized analogues of Timedelta methods
  599. def total_seconds(self) -> npt.NDArray[np.float64]:
  600. """
  601. Return total duration of each element expressed in seconds.
  602. This method is available directly on TimedeltaArray, TimedeltaIndex
  603. and on Series containing timedelta values under the ``.dt`` namespace.
  604. Returns
  605. -------
  606. ndarray, Index or Series
  607. When the calling object is a TimedeltaArray, the return type
  608. is ndarray. When the calling object is a TimedeltaIndex,
  609. the return type is an Index with a float64 dtype. When the calling object
  610. is a Series, the return type is Series of type `float64` whose
  611. index is the same as the original.
  612. See Also
  613. --------
  614. datetime.timedelta.total_seconds : Standard library version
  615. of this method.
  616. TimedeltaIndex.components : Return a DataFrame with components of
  617. each Timedelta.
  618. Examples
  619. --------
  620. **Series**
  621. >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='d'))
  622. >>> s
  623. 0 0 days
  624. 1 1 days
  625. 2 2 days
  626. 3 3 days
  627. 4 4 days
  628. dtype: timedelta64[ns]
  629. >>> s.dt.total_seconds()
  630. 0 0.0
  631. 1 86400.0
  632. 2 172800.0
  633. 3 259200.0
  634. 4 345600.0
  635. dtype: float64
  636. **TimedeltaIndex**
  637. >>> idx = pd.to_timedelta(np.arange(5), unit='d')
  638. >>> idx
  639. TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
  640. dtype='timedelta64[ns]', freq=None)
  641. >>> idx.total_seconds()
  642. Index([0.0, 86400.0, 172800.0, 259200.0, 345600.0], dtype='float64')
  643. """
  644. pps = periods_per_second(self._creso)
  645. return self._maybe_mask_results(self.asi8 / pps, fill_value=None)
  646. def to_pytimedelta(self) -> npt.NDArray[np.object_]:
  647. """
  648. Return an ndarray of datetime.timedelta objects.
  649. Returns
  650. -------
  651. numpy.ndarray
  652. """
  653. return ints_to_pytimedelta(self._ndarray)
  654. days = _field_accessor("days", "days", "Number of days for each element.")
  655. seconds = _field_accessor(
  656. "seconds",
  657. "seconds",
  658. "Number of seconds (>= 0 and less than 1 day) for each element.",
  659. )
  660. microseconds = _field_accessor(
  661. "microseconds",
  662. "microseconds",
  663. "Number of microseconds (>= 0 and less than 1 second) for each element.",
  664. )
  665. nanoseconds = _field_accessor(
  666. "nanoseconds",
  667. "nanoseconds",
  668. "Number of nanoseconds (>= 0 and less than 1 microsecond) for each element.",
  669. )
  670. @property
  671. def components(self) -> DataFrame:
  672. """
  673. Return a DataFrame of the individual resolution components of the Timedeltas.
  674. The components (days, hours, minutes seconds, milliseconds, microseconds,
  675. nanoseconds) are returned as columns in a DataFrame.
  676. Returns
  677. -------
  678. DataFrame
  679. """
  680. from pandas import DataFrame
  681. columns = [
  682. "days",
  683. "hours",
  684. "minutes",
  685. "seconds",
  686. "milliseconds",
  687. "microseconds",
  688. "nanoseconds",
  689. ]
  690. hasnans = self._hasna
  691. if hasnans:
  692. def f(x):
  693. if isna(x):
  694. return [np.nan] * len(columns)
  695. return x.components
  696. else:
  697. def f(x):
  698. return x.components
  699. result = DataFrame([f(x) for x in self], columns=columns)
  700. if not hasnans:
  701. result = result.astype("int64")
  702. return result
  703. # ---------------------------------------------------------------------
  704. # Constructor Helpers
  705. def sequence_to_td64ns(
  706. data,
  707. copy: bool = False,
  708. unit=None,
  709. errors: DateTimeErrorChoices = "raise",
  710. ) -> tuple[np.ndarray, Tick | None]:
  711. """
  712. Parameters
  713. ----------
  714. data : list-like
  715. copy : bool, default False
  716. unit : str, optional
  717. The timedelta unit to treat integers as multiples of. For numeric
  718. data this defaults to ``'ns'``.
  719. Must be un-specified if the data contains a str and ``errors=="raise"``.
  720. errors : {"raise", "coerce", "ignore"}, default "raise"
  721. How to handle elements that cannot be converted to timedelta64[ns].
  722. See ``pandas.to_timedelta`` for details.
  723. Returns
  724. -------
  725. converted : numpy.ndarray
  726. The sequence converted to a numpy array with dtype ``timedelta64[ns]``.
  727. inferred_freq : Tick or None
  728. The inferred frequency of the sequence.
  729. Raises
  730. ------
  731. ValueError : Data cannot be converted to timedelta64[ns].
  732. Notes
  733. -----
  734. Unlike `pandas.to_timedelta`, if setting ``errors=ignore`` will not cause
  735. errors to be ignored; they are caught and subsequently ignored at a
  736. higher level.
  737. """
  738. assert unit not in ["Y", "y", "M"] # caller is responsible for checking
  739. inferred_freq = None
  740. if unit is not None:
  741. unit = parse_timedelta_unit(unit)
  742. data, copy = dtl.ensure_arraylike_for_datetimelike(
  743. data, copy, cls_name="TimedeltaArray"
  744. )
  745. if isinstance(data, TimedeltaArray):
  746. inferred_freq = data.freq
  747. # Convert whatever we have into timedelta64[ns] dtype
  748. if is_object_dtype(data.dtype) or is_string_dtype(data.dtype):
  749. # no need to make a copy, need to convert if string-dtyped
  750. data = _objects_to_td64ns(data, unit=unit, errors=errors)
  751. copy = False
  752. elif is_integer_dtype(data.dtype):
  753. # treat as multiples of the given unit
  754. data, copy_made = _ints_to_td64ns(data, unit=unit)
  755. copy = copy and not copy_made
  756. elif is_float_dtype(data.dtype):
  757. # cast the unit, multiply base/frac separately
  758. # to avoid precision issues from float -> int
  759. if is_extension_array_dtype(data):
  760. mask = data._mask
  761. data = data._data
  762. else:
  763. mask = np.isnan(data)
  764. # The next few lines are effectively a vectorized 'cast_from_unit'
  765. m, p = precision_from_unit(unit or "ns")
  766. with warnings.catch_warnings():
  767. # Suppress RuntimeWarning about All-NaN slice
  768. warnings.filterwarnings(
  769. "ignore", "invalid value encountered in cast", RuntimeWarning
  770. )
  771. base = data.astype(np.int64)
  772. frac = data - base
  773. if p:
  774. frac = np.round(frac, p)
  775. with warnings.catch_warnings():
  776. warnings.filterwarnings(
  777. "ignore", "invalid value encountered in cast", RuntimeWarning
  778. )
  779. data = (base * m + (frac * m).astype(np.int64)).view("timedelta64[ns]")
  780. data[mask] = iNaT
  781. copy = False
  782. elif is_timedelta64_dtype(data.dtype):
  783. data_unit = get_unit_from_dtype(data.dtype)
  784. if not is_supported_unit(data_unit):
  785. # cast to closest supported unit, i.e. s or ns
  786. new_reso = get_supported_reso(data_unit)
  787. new_unit = npy_unit_to_abbrev(new_reso)
  788. new_dtype = np.dtype(f"m8[{new_unit}]")
  789. data = astype_overflowsafe(data, dtype=new_dtype, copy=False)
  790. copy = False
  791. else:
  792. # This includes datetime64-dtype, see GH#23539, GH#29794
  793. raise TypeError(f"dtype {data.dtype} cannot be converted to timedelta64[ns]")
  794. data = np.array(data, copy=copy)
  795. assert data.dtype.kind == "m"
  796. assert data.dtype != "m8" # i.e. not unit-less
  797. return data, inferred_freq
  798. def _ints_to_td64ns(data, unit: str = "ns"):
  799. """
  800. Convert an ndarray with integer-dtype to timedelta64[ns] dtype, treating
  801. the integers as multiples of the given timedelta unit.
  802. Parameters
  803. ----------
  804. data : numpy.ndarray with integer-dtype
  805. unit : str, default "ns"
  806. The timedelta unit to treat integers as multiples of.
  807. Returns
  808. -------
  809. numpy.ndarray : timedelta64[ns] array converted from data
  810. bool : whether a copy was made
  811. """
  812. copy_made = False
  813. unit = unit if unit is not None else "ns"
  814. if data.dtype != np.int64:
  815. # converting to int64 makes a copy, so we can avoid
  816. # re-copying later
  817. data = data.astype(np.int64)
  818. copy_made = True
  819. if unit != "ns":
  820. dtype_str = f"timedelta64[{unit}]"
  821. data = data.view(dtype_str)
  822. data = astype_overflowsafe(data, dtype=TD64NS_DTYPE)
  823. # the astype conversion makes a copy, so we can avoid re-copying later
  824. copy_made = True
  825. else:
  826. data = data.view("timedelta64[ns]")
  827. return data, copy_made
  828. def _objects_to_td64ns(data, unit=None, errors: DateTimeErrorChoices = "raise"):
  829. """
  830. Convert a object-dtyped or string-dtyped array into an
  831. timedelta64[ns]-dtyped array.
  832. Parameters
  833. ----------
  834. data : ndarray or Index
  835. unit : str, default "ns"
  836. The timedelta unit to treat integers as multiples of.
  837. Must not be specified if the data contains a str.
  838. errors : {"raise", "coerce", "ignore"}, default "raise"
  839. How to handle elements that cannot be converted to timedelta64[ns].
  840. See ``pandas.to_timedelta`` for details.
  841. Returns
  842. -------
  843. numpy.ndarray : timedelta64[ns] array converted from data
  844. Raises
  845. ------
  846. ValueError : Data cannot be converted to timedelta64[ns].
  847. Notes
  848. -----
  849. Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause
  850. errors to be ignored; they are caught and subsequently ignored at a
  851. higher level.
  852. """
  853. # coerce Index to np.ndarray, converting string-dtype if necessary
  854. values = np.array(data, dtype=np.object_, copy=False)
  855. result = array_to_timedelta64(values, unit=unit, errors=errors)
  856. return result.view("timedelta64[ns]")
  857. def _validate_td64_dtype(dtype) -> DtypeObj:
  858. dtype = pandas_dtype(dtype)
  859. if is_dtype_equal(dtype, np.dtype("timedelta64")):
  860. # no precision disallowed GH#24806
  861. msg = (
  862. "Passing in 'timedelta' dtype with no precision is not allowed. "
  863. "Please pass in 'timedelta64[ns]' instead."
  864. )
  865. raise ValueError(msg)
  866. if (
  867. not isinstance(dtype, np.dtype)
  868. or dtype.kind != "m"
  869. or not is_supported_unit(get_unit_from_dtype(dtype))
  870. ):
  871. raise ValueError(f"dtype {dtype} cannot be converted to timedelta64[ns]")
  872. return dtype