timedeltas.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. """
  2. timedelta support tools
  3. """
  4. from __future__ import annotations
  5. from datetime import timedelta
  6. from typing import (
  7. TYPE_CHECKING,
  8. overload,
  9. )
  10. import numpy as np
  11. from pandas._libs import lib
  12. from pandas._libs.tslibs import (
  13. NaT,
  14. NaTType,
  15. )
  16. from pandas._libs.tslibs.timedeltas import (
  17. Timedelta,
  18. parse_timedelta_unit,
  19. )
  20. from pandas.core.dtypes.common import is_list_like
  21. from pandas.core.dtypes.generic import (
  22. ABCIndex,
  23. ABCSeries,
  24. )
  25. from pandas.core.arrays.timedeltas import sequence_to_td64ns
  26. if TYPE_CHECKING:
  27. from pandas._libs.tslibs.timedeltas import UnitChoices
  28. from pandas._typing import (
  29. ArrayLike,
  30. DateTimeErrorChoices,
  31. )
  32. from pandas import (
  33. Index,
  34. Series,
  35. TimedeltaIndex,
  36. )
  37. @overload
  38. def to_timedelta(
  39. arg: str | float | timedelta,
  40. unit: UnitChoices | None = ...,
  41. errors: DateTimeErrorChoices = ...,
  42. ) -> Timedelta:
  43. ...
  44. @overload
  45. def to_timedelta(
  46. arg: Series,
  47. unit: UnitChoices | None = ...,
  48. errors: DateTimeErrorChoices = ...,
  49. ) -> Series:
  50. ...
  51. @overload
  52. def to_timedelta(
  53. arg: list | tuple | range | ArrayLike | Index,
  54. unit: UnitChoices | None = ...,
  55. errors: DateTimeErrorChoices = ...,
  56. ) -> TimedeltaIndex:
  57. ...
  58. def to_timedelta(
  59. arg: str
  60. | int
  61. | float
  62. | timedelta
  63. | list
  64. | tuple
  65. | range
  66. | ArrayLike
  67. | Index
  68. | Series,
  69. unit: UnitChoices | None = None,
  70. errors: DateTimeErrorChoices = "raise",
  71. ) -> Timedelta | TimedeltaIndex | Series:
  72. """
  73. Convert argument to timedelta.
  74. Timedeltas are absolute differences in times, expressed in difference
  75. units (e.g. days, hours, minutes, seconds). This method converts
  76. an argument from a recognized timedelta format / value into
  77. a Timedelta type.
  78. Parameters
  79. ----------
  80. arg : str, timedelta, list-like or Series
  81. The data to be converted to timedelta.
  82. .. versionchanged:: 2.0
  83. Strings with units 'M', 'Y' and 'y' do not represent
  84. unambiguous timedelta values and will raise an exception.
  85. unit : str, optional
  86. Denotes the unit of the arg for numeric `arg`. Defaults to ``"ns"``.
  87. Possible values:
  88. * 'W'
  89. * 'D' / 'days' / 'day'
  90. * 'hours' / 'hour' / 'hr' / 'h'
  91. * 'm' / 'minute' / 'min' / 'minutes' / 'T'
  92. * 'S' / 'seconds' / 'sec' / 'second'
  93. * 'ms' / 'milliseconds' / 'millisecond' / 'milli' / 'millis' / 'L'
  94. * 'us' / 'microseconds' / 'microsecond' / 'micro' / 'micros' / 'U'
  95. * 'ns' / 'nanoseconds' / 'nano' / 'nanos' / 'nanosecond' / 'N'
  96. .. versionchanged:: 1.1.0
  97. Must not be specified when `arg` context strings and
  98. ``errors="raise"``.
  99. errors : {'ignore', 'raise', 'coerce'}, default 'raise'
  100. - If 'raise', then invalid parsing will raise an exception.
  101. - If 'coerce', then invalid parsing will be set as NaT.
  102. - If 'ignore', then invalid parsing will return the input.
  103. Returns
  104. -------
  105. timedelta
  106. If parsing succeeded.
  107. Return type depends on input:
  108. - list-like: TimedeltaIndex of timedelta64 dtype
  109. - Series: Series of timedelta64 dtype
  110. - scalar: Timedelta
  111. See Also
  112. --------
  113. DataFrame.astype : Cast argument to a specified dtype.
  114. to_datetime : Convert argument to datetime.
  115. convert_dtypes : Convert dtypes.
  116. Notes
  117. -----
  118. If the precision is higher than nanoseconds, the precision of the duration is
  119. truncated to nanoseconds for string inputs.
  120. Examples
  121. --------
  122. Parsing a single string to a Timedelta:
  123. >>> pd.to_timedelta('1 days 06:05:01.00003')
  124. Timedelta('1 days 06:05:01.000030')
  125. >>> pd.to_timedelta('15.5us')
  126. Timedelta('0 days 00:00:00.000015500')
  127. Parsing a list or array of strings:
  128. >>> pd.to_timedelta(['1 days 06:05:01.00003', '15.5us', 'nan'])
  129. TimedeltaIndex(['1 days 06:05:01.000030', '0 days 00:00:00.000015500', NaT],
  130. dtype='timedelta64[ns]', freq=None)
  131. Converting numbers by specifying the `unit` keyword argument:
  132. >>> pd.to_timedelta(np.arange(5), unit='s')
  133. TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01', '0 days 00:00:02',
  134. '0 days 00:00:03', '0 days 00:00:04'],
  135. dtype='timedelta64[ns]', freq=None)
  136. >>> pd.to_timedelta(np.arange(5), unit='d')
  137. TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'],
  138. dtype='timedelta64[ns]', freq=None)
  139. """
  140. if unit is not None:
  141. unit = parse_timedelta_unit(unit)
  142. if errors not in ("ignore", "raise", "coerce"):
  143. raise ValueError("errors must be one of 'ignore', 'raise', or 'coerce'.")
  144. if unit in {"Y", "y", "M"}:
  145. raise ValueError(
  146. "Units 'M', 'Y', and 'y' are no longer supported, as they do not "
  147. "represent unambiguous timedelta values durations."
  148. )
  149. if arg is None:
  150. return arg
  151. elif isinstance(arg, ABCSeries):
  152. values = _convert_listlike(arg._values, unit=unit, errors=errors)
  153. return arg._constructor(values, index=arg.index, name=arg.name)
  154. elif isinstance(arg, ABCIndex):
  155. return _convert_listlike(arg, unit=unit, errors=errors, name=arg.name)
  156. elif isinstance(arg, np.ndarray) and arg.ndim == 0:
  157. # extract array scalar and process below
  158. # error: Incompatible types in assignment (expression has type "object",
  159. # variable has type "Union[str, int, float, timedelta, List[Any],
  160. # Tuple[Any, ...], Union[Union[ExtensionArray, ndarray[Any, Any]], Index,
  161. # Series]]") [assignment]
  162. arg = lib.item_from_zerodim(arg) # type: ignore[assignment]
  163. elif is_list_like(arg) and getattr(arg, "ndim", 1) == 1:
  164. return _convert_listlike(arg, unit=unit, errors=errors)
  165. elif getattr(arg, "ndim", 1) > 1:
  166. raise TypeError(
  167. "arg must be a string, timedelta, list, tuple, 1-d array, or Series"
  168. )
  169. if isinstance(arg, str) and unit is not None:
  170. raise ValueError("unit must not be specified if the input is/contains a str")
  171. # ...so it must be a scalar value. Return scalar.
  172. return _coerce_scalar_to_timedelta_type(arg, unit=unit, errors=errors)
  173. def _coerce_scalar_to_timedelta_type(
  174. r, unit: UnitChoices | None = "ns", errors: DateTimeErrorChoices = "raise"
  175. ):
  176. """Convert string 'r' to a timedelta object."""
  177. result: Timedelta | NaTType
  178. try:
  179. result = Timedelta(r, unit)
  180. except ValueError:
  181. if errors == "raise":
  182. raise
  183. if errors == "ignore":
  184. return r
  185. # coerce
  186. result = NaT
  187. return result
  188. def _convert_listlike(
  189. arg, unit=None, errors: DateTimeErrorChoices = "raise", name=None
  190. ):
  191. """Convert a list of objects to a timedelta index object."""
  192. if isinstance(arg, (list, tuple)) or not hasattr(arg, "dtype"):
  193. # This is needed only to ensure that in the case where we end up
  194. # returning arg (errors == "ignore"), and where the input is a
  195. # generator, we return a useful list-like instead of a
  196. # used-up generator
  197. if not hasattr(arg, "__array__"):
  198. arg = list(arg)
  199. arg = np.array(arg, dtype=object)
  200. try:
  201. td64arr = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0]
  202. except ValueError:
  203. if errors == "ignore":
  204. return arg
  205. else:
  206. # This else-block accounts for the cases when errors='raise'
  207. # and errors='coerce'. If errors == 'raise', these errors
  208. # should be raised. If errors == 'coerce', we shouldn't
  209. # expect any errors to be raised, since all parsing errors
  210. # cause coercion to pd.NaT. However, if an error / bug is
  211. # introduced that causes an Exception to be raised, we would
  212. # like to surface it.
  213. raise
  214. from pandas import TimedeltaIndex
  215. value = TimedeltaIndex(td64arr, unit="ns", name=name)
  216. return value