datetimelike.py 75 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267
  1. from __future__ import annotations
  2. from datetime import (
  3. datetime,
  4. timedelta,
  5. )
  6. from functools import wraps
  7. import operator
  8. from typing import (
  9. TYPE_CHECKING,
  10. Any,
  11. Callable,
  12. Iterator,
  13. Literal,
  14. Sequence,
  15. TypeVar,
  16. Union,
  17. cast,
  18. final,
  19. overload,
  20. )
  21. import warnings
  22. import numpy as np
  23. from pandas._libs import (
  24. algos,
  25. lib,
  26. )
  27. from pandas._libs.arrays import NDArrayBacked
  28. from pandas._libs.tslibs import (
  29. BaseOffset,
  30. IncompatibleFrequency,
  31. NaT,
  32. NaTType,
  33. Period,
  34. Resolution,
  35. Tick,
  36. Timedelta,
  37. Timestamp,
  38. astype_overflowsafe,
  39. delta_to_nanoseconds,
  40. get_unit_from_dtype,
  41. iNaT,
  42. ints_to_pydatetime,
  43. ints_to_pytimedelta,
  44. to_offset,
  45. )
  46. from pandas._libs.tslibs.fields import (
  47. RoundTo,
  48. round_nsint64,
  49. )
  50. from pandas._libs.tslibs.np_datetime import compare_mismatched_resolutions
  51. from pandas._libs.tslibs.timestamps import integer_op_not_supported
  52. from pandas._typing import (
  53. ArrayLike,
  54. AxisInt,
  55. DatetimeLikeScalar,
  56. Dtype,
  57. DtypeObj,
  58. F,
  59. NpDtype,
  60. PositionalIndexer2D,
  61. PositionalIndexerTuple,
  62. ScalarIndexer,
  63. SequenceIndexer,
  64. TimeAmbiguous,
  65. TimeNonexistent,
  66. npt,
  67. )
  68. from pandas.compat.numpy import function as nv
  69. from pandas.errors import (
  70. AbstractMethodError,
  71. InvalidComparison,
  72. PerformanceWarning,
  73. )
  74. from pandas.util._decorators import (
  75. Appender,
  76. Substitution,
  77. cache_readonly,
  78. )
  79. from pandas.util._exceptions import find_stack_level
  80. from pandas.core.dtypes.common import (
  81. is_all_strings,
  82. is_categorical_dtype,
  83. is_datetime64_any_dtype,
  84. is_datetime64_dtype,
  85. is_datetime64tz_dtype,
  86. is_datetime_or_timedelta_dtype,
  87. is_dtype_equal,
  88. is_float_dtype,
  89. is_integer_dtype,
  90. is_list_like,
  91. is_object_dtype,
  92. is_period_dtype,
  93. is_string_dtype,
  94. is_timedelta64_dtype,
  95. pandas_dtype,
  96. )
  97. from pandas.core.dtypes.dtypes import (
  98. DatetimeTZDtype,
  99. ExtensionDtype,
  100. )
  101. from pandas.core.dtypes.generic import (
  102. ABCCategorical,
  103. ABCMultiIndex,
  104. )
  105. from pandas.core.dtypes.missing import (
  106. is_valid_na_for_dtype,
  107. isna,
  108. )
  109. from pandas.core import (
  110. algorithms,
  111. nanops,
  112. ops,
  113. )
  114. from pandas.core.algorithms import (
  115. checked_add_with_arr,
  116. isin,
  117. unique1d,
  118. )
  119. from pandas.core.array_algos import datetimelike_accumulations
  120. from pandas.core.arraylike import OpsMixin
  121. from pandas.core.arrays._mixins import (
  122. NDArrayBackedExtensionArray,
  123. ravel_compat,
  124. )
  125. from pandas.core.arrays.arrow.array import ArrowExtensionArray
  126. from pandas.core.arrays.base import ExtensionArray
  127. from pandas.core.arrays.integer import IntegerArray
  128. import pandas.core.common as com
  129. from pandas.core.construction import (
  130. array as pd_array,
  131. ensure_wrapped_if_datetimelike,
  132. extract_array,
  133. )
  134. from pandas.core.indexers import (
  135. check_array_indexer,
  136. check_setitem_lengths,
  137. )
  138. from pandas.core.ops.common import unpack_zerodim_and_defer
  139. from pandas.core.ops.invalid import (
  140. invalid_comparison,
  141. make_invalid_op,
  142. )
  143. from pandas.tseries import frequencies
  144. if TYPE_CHECKING:
  145. from pandas.core.arrays import (
  146. DatetimeArray,
  147. PeriodArray,
  148. TimedeltaArray,
  149. )
  150. DTScalarOrNaT = Union[DatetimeLikeScalar, NaTType]
  151. DatetimeLikeArrayT = TypeVar("DatetimeLikeArrayT", bound="DatetimeLikeArrayMixin")
  152. def _period_dispatch(meth: F) -> F:
  153. """
  154. For PeriodArray methods, dispatch to DatetimeArray and re-wrap the results
  155. in PeriodArray. We cannot use ._ndarray directly for the affected
  156. methods because the i8 data has different semantics on NaT values.
  157. """
  158. @wraps(meth)
  159. def new_meth(self, *args, **kwargs):
  160. if not is_period_dtype(self.dtype):
  161. return meth(self, *args, **kwargs)
  162. arr = self.view("M8[ns]")
  163. result = meth(arr, *args, **kwargs)
  164. if result is NaT:
  165. return NaT
  166. elif isinstance(result, Timestamp):
  167. return self._box_func(result._value)
  168. res_i8 = result.view("i8")
  169. return self._from_backing_data(res_i8)
  170. return cast(F, new_meth)
  171. class DatetimeLikeArrayMixin(OpsMixin, NDArrayBackedExtensionArray):
  172. """
  173. Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray
  174. Assumes that __new__/__init__ defines:
  175. _ndarray
  176. and that inheriting subclass implements:
  177. freq
  178. """
  179. # _infer_matches -> which infer_dtype strings are close enough to our own
  180. _infer_matches: tuple[str, ...]
  181. _is_recognized_dtype: Callable[[DtypeObj], bool]
  182. _recognized_scalars: tuple[type, ...]
  183. _ndarray: np.ndarray
  184. freq: BaseOffset | None
  185. @cache_readonly
  186. def _can_hold_na(self) -> bool:
  187. return True
  188. def __init__(
  189. self, data, dtype: Dtype | None = None, freq=None, copy: bool = False
  190. ) -> None:
  191. raise AbstractMethodError(self)
  192. @property
  193. def _scalar_type(self) -> type[DatetimeLikeScalar]:
  194. """
  195. The scalar associated with this datelike
  196. * PeriodArray : Period
  197. * DatetimeArray : Timestamp
  198. * TimedeltaArray : Timedelta
  199. """
  200. raise AbstractMethodError(self)
  201. def _scalar_from_string(self, value: str) -> DTScalarOrNaT:
  202. """
  203. Construct a scalar type from a string.
  204. Parameters
  205. ----------
  206. value : str
  207. Returns
  208. -------
  209. Period, Timestamp, or Timedelta, or NaT
  210. Whatever the type of ``self._scalar_type`` is.
  211. Notes
  212. -----
  213. This should call ``self._check_compatible_with`` before
  214. unboxing the result.
  215. """
  216. raise AbstractMethodError(self)
  217. def _unbox_scalar(
  218. self, value: DTScalarOrNaT
  219. ) -> np.int64 | np.datetime64 | np.timedelta64:
  220. """
  221. Unbox the integer value of a scalar `value`.
  222. Parameters
  223. ----------
  224. value : Period, Timestamp, Timedelta, or NaT
  225. Depending on subclass.
  226. Returns
  227. -------
  228. int
  229. Examples
  230. --------
  231. >>> self._unbox_scalar(Timedelta("10s")) # doctest: +SKIP
  232. 10000000000
  233. """
  234. raise AbstractMethodError(self)
  235. def _check_compatible_with(self, other: DTScalarOrNaT) -> None:
  236. """
  237. Verify that `self` and `other` are compatible.
  238. * DatetimeArray verifies that the timezones (if any) match
  239. * PeriodArray verifies that the freq matches
  240. * Timedelta has no verification
  241. In each case, NaT is considered compatible.
  242. Parameters
  243. ----------
  244. other
  245. Raises
  246. ------
  247. Exception
  248. """
  249. raise AbstractMethodError(self)
  250. # ------------------------------------------------------------------
  251. def _box_func(self, x):
  252. """
  253. box function to get object from internal representation
  254. """
  255. raise AbstractMethodError(self)
  256. def _box_values(self, values) -> np.ndarray:
  257. """
  258. apply box func to passed values
  259. """
  260. return lib.map_infer(values, self._box_func, convert=False)
  261. def __iter__(self) -> Iterator:
  262. if self.ndim > 1:
  263. return (self[n] for n in range(len(self)))
  264. else:
  265. return (self._box_func(v) for v in self.asi8)
  266. @property
  267. def asi8(self) -> npt.NDArray[np.int64]:
  268. """
  269. Integer representation of the values.
  270. Returns
  271. -------
  272. ndarray
  273. An ndarray with int64 dtype.
  274. """
  275. # do not cache or you'll create a memory leak
  276. return self._ndarray.view("i8")
  277. # ----------------------------------------------------------------
  278. # Rendering Methods
  279. def _format_native_types(
  280. self, *, na_rep: str | float = "NaT", date_format=None
  281. ) -> npt.NDArray[np.object_]:
  282. """
  283. Helper method for astype when converting to strings.
  284. Returns
  285. -------
  286. ndarray[str]
  287. """
  288. raise AbstractMethodError(self)
  289. def _formatter(self, boxed: bool = False):
  290. # TODO: Remove Datetime & DatetimeTZ formatters.
  291. return "'{}'".format
  292. # ----------------------------------------------------------------
  293. # Array-Like / EA-Interface Methods
  294. def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
  295. # used for Timedelta/DatetimeArray, overwritten by PeriodArray
  296. if is_object_dtype(dtype):
  297. return np.array(list(self), dtype=object)
  298. return self._ndarray
  299. @overload
  300. def __getitem__(self, item: ScalarIndexer) -> DTScalarOrNaT:
  301. ...
  302. @overload
  303. def __getitem__(
  304. self: DatetimeLikeArrayT,
  305. item: SequenceIndexer | PositionalIndexerTuple,
  306. ) -> DatetimeLikeArrayT:
  307. ...
  308. def __getitem__(
  309. self: DatetimeLikeArrayT, key: PositionalIndexer2D
  310. ) -> DatetimeLikeArrayT | DTScalarOrNaT:
  311. """
  312. This getitem defers to the underlying array, which by-definition can
  313. only handle list-likes, slices, and integer scalars
  314. """
  315. # Use cast as we know we will get back a DatetimeLikeArray or DTScalar,
  316. # but skip evaluating the Union at runtime for performance
  317. # (see https://github.com/pandas-dev/pandas/pull/44624)
  318. result = cast(
  319. "Union[DatetimeLikeArrayT, DTScalarOrNaT]", super().__getitem__(key)
  320. )
  321. if lib.is_scalar(result):
  322. return result
  323. else:
  324. # At this point we know the result is an array.
  325. result = cast(DatetimeLikeArrayT, result)
  326. result._freq = self._get_getitem_freq(key)
  327. return result
  328. def _get_getitem_freq(self, key) -> BaseOffset | None:
  329. """
  330. Find the `freq` attribute to assign to the result of a __getitem__ lookup.
  331. """
  332. is_period = is_period_dtype(self.dtype)
  333. if is_period:
  334. freq = self.freq
  335. elif self.ndim != 1:
  336. freq = None
  337. else:
  338. key = check_array_indexer(self, key) # maybe ndarray[bool] -> slice
  339. freq = None
  340. if isinstance(key, slice):
  341. if self.freq is not None and key.step is not None:
  342. freq = key.step * self.freq
  343. else:
  344. freq = self.freq
  345. elif key is Ellipsis:
  346. # GH#21282 indexing with Ellipsis is similar to a full slice,
  347. # should preserve `freq` attribute
  348. freq = self.freq
  349. elif com.is_bool_indexer(key):
  350. new_key = lib.maybe_booleans_to_slice(key.view(np.uint8))
  351. if isinstance(new_key, slice):
  352. return self._get_getitem_freq(new_key)
  353. return freq
  354. # error: Argument 1 of "__setitem__" is incompatible with supertype
  355. # "ExtensionArray"; supertype defines the argument type as "Union[int,
  356. # ndarray]"
  357. def __setitem__(
  358. self,
  359. key: int | Sequence[int] | Sequence[bool] | slice,
  360. value: NaTType | Any | Sequence[Any],
  361. ) -> None:
  362. # I'm fudging the types a bit here. "Any" above really depends
  363. # on type(self). For PeriodArray, it's Period (or stuff coercible
  364. # to a period in from_sequence). For DatetimeArray, it's Timestamp...
  365. # I don't know if mypy can do that, possibly with Generics.
  366. # https://mypy.readthedocs.io/en/latest/generics.html
  367. no_op = check_setitem_lengths(key, value, self)
  368. # Calling super() before the no_op short-circuit means that we raise
  369. # on invalid 'value' even if this is a no-op, e.g. wrong-dtype empty array.
  370. super().__setitem__(key, value)
  371. if no_op:
  372. return
  373. self._maybe_clear_freq()
  374. def _maybe_clear_freq(self) -> None:
  375. # inplace operations like __setitem__ may invalidate the freq of
  376. # DatetimeArray and TimedeltaArray
  377. pass
  378. def astype(self, dtype, copy: bool = True):
  379. # Some notes on cases we don't have to handle here in the base class:
  380. # 1. PeriodArray.astype handles period -> period
  381. # 2. DatetimeArray.astype handles conversion between tz.
  382. # 3. DatetimeArray.astype handles datetime -> period
  383. dtype = pandas_dtype(dtype)
  384. if is_object_dtype(dtype):
  385. if self.dtype.kind == "M":
  386. self = cast("DatetimeArray", self)
  387. # *much* faster than self._box_values
  388. # for e.g. test_get_loc_tuple_monotonic_above_size_cutoff
  389. i8data = self.asi8
  390. converted = ints_to_pydatetime(
  391. i8data,
  392. tz=self.tz,
  393. box="timestamp",
  394. reso=self._creso,
  395. )
  396. return converted
  397. elif self.dtype.kind == "m":
  398. return ints_to_pytimedelta(self._ndarray, box=True)
  399. return self._box_values(self.asi8.ravel()).reshape(self.shape)
  400. elif isinstance(dtype, ExtensionDtype):
  401. return super().astype(dtype, copy=copy)
  402. elif is_string_dtype(dtype):
  403. return self._format_native_types()
  404. elif is_integer_dtype(dtype):
  405. # we deliberately ignore int32 vs. int64 here.
  406. # See https://github.com/pandas-dev/pandas/issues/24381 for more.
  407. values = self.asi8
  408. if dtype != np.int64:
  409. raise TypeError(
  410. f"Converting from {self.dtype} to {dtype} is not supported. "
  411. "Do obj.astype('int64').astype(dtype) instead"
  412. )
  413. if copy:
  414. values = values.copy()
  415. return values
  416. elif (
  417. is_datetime_or_timedelta_dtype(dtype)
  418. and not is_dtype_equal(self.dtype, dtype)
  419. ) or is_float_dtype(dtype):
  420. # disallow conversion between datetime/timedelta,
  421. # and conversions for any datetimelike to float
  422. msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
  423. raise TypeError(msg)
  424. else:
  425. return np.asarray(self, dtype=dtype)
  426. @overload
  427. def view(self: DatetimeLikeArrayT) -> DatetimeLikeArrayT:
  428. ...
  429. @overload
  430. def view(self, dtype: Literal["M8[ns]"]) -> DatetimeArray:
  431. ...
  432. @overload
  433. def view(self, dtype: Literal["m8[ns]"]) -> TimedeltaArray:
  434. ...
  435. @overload
  436. def view(self, dtype: Dtype | None = ...) -> ArrayLike:
  437. ...
  438. # pylint: disable-next=useless-parent-delegation
  439. def view(self, dtype: Dtype | None = None) -> ArrayLike:
  440. # we need to explicitly call super() method as long as the `@overload`s
  441. # are present in this file.
  442. return super().view(dtype)
  443. # ------------------------------------------------------------------
  444. # ExtensionArray Interface
  445. @classmethod
  446. def _concat_same_type(
  447. cls: type[DatetimeLikeArrayT],
  448. to_concat: Sequence[DatetimeLikeArrayT],
  449. axis: AxisInt = 0,
  450. ) -> DatetimeLikeArrayT:
  451. new_obj = super()._concat_same_type(to_concat, axis)
  452. obj = to_concat[0]
  453. dtype = obj.dtype
  454. new_freq = None
  455. if is_period_dtype(dtype):
  456. new_freq = obj.freq
  457. elif axis == 0:
  458. # GH 3232: If the concat result is evenly spaced, we can retain the
  459. # original frequency
  460. to_concat = [x for x in to_concat if len(x)]
  461. if obj.freq is not None and all(x.freq == obj.freq for x in to_concat):
  462. pairs = zip(to_concat[:-1], to_concat[1:])
  463. if all(pair[0][-1] + obj.freq == pair[1][0] for pair in pairs):
  464. new_freq = obj.freq
  465. new_obj._freq = new_freq
  466. return new_obj
  467. def copy(self: DatetimeLikeArrayT, order: str = "C") -> DatetimeLikeArrayT:
  468. # error: Unexpected keyword argument "order" for "copy"
  469. new_obj = super().copy(order=order) # type: ignore[call-arg]
  470. new_obj._freq = self.freq
  471. return new_obj
  472. # ------------------------------------------------------------------
  473. # Validation Methods
  474. # TODO: try to de-duplicate these, ensure identical behavior
  475. def _validate_comparison_value(self, other):
  476. if isinstance(other, str):
  477. try:
  478. # GH#18435 strings get a pass from tzawareness compat
  479. other = self._scalar_from_string(other)
  480. except (ValueError, IncompatibleFrequency):
  481. # failed to parse as Timestamp/Timedelta/Period
  482. raise InvalidComparison(other)
  483. if isinstance(other, self._recognized_scalars) or other is NaT:
  484. other = self._scalar_type(other)
  485. try:
  486. self._check_compatible_with(other)
  487. except (TypeError, IncompatibleFrequency) as err:
  488. # e.g. tzawareness mismatch
  489. raise InvalidComparison(other) from err
  490. elif not is_list_like(other):
  491. raise InvalidComparison(other)
  492. elif len(other) != len(self):
  493. raise ValueError("Lengths must match")
  494. else:
  495. try:
  496. other = self._validate_listlike(other, allow_object=True)
  497. self._check_compatible_with(other)
  498. except (TypeError, IncompatibleFrequency) as err:
  499. if is_object_dtype(getattr(other, "dtype", None)):
  500. # We will have to operate element-wise
  501. pass
  502. else:
  503. raise InvalidComparison(other) from err
  504. return other
  505. def _validate_scalar(
  506. self,
  507. value,
  508. *,
  509. allow_listlike: bool = False,
  510. unbox: bool = True,
  511. ):
  512. """
  513. Validate that the input value can be cast to our scalar_type.
  514. Parameters
  515. ----------
  516. value : object
  517. allow_listlike: bool, default False
  518. When raising an exception, whether the message should say
  519. listlike inputs are allowed.
  520. unbox : bool, default True
  521. Whether to unbox the result before returning. Note: unbox=False
  522. skips the setitem compatibility check.
  523. Returns
  524. -------
  525. self._scalar_type or NaT
  526. """
  527. if isinstance(value, self._scalar_type):
  528. pass
  529. elif isinstance(value, str):
  530. # NB: Careful about tzawareness
  531. try:
  532. value = self._scalar_from_string(value)
  533. except ValueError as err:
  534. msg = self._validation_error_message(value, allow_listlike)
  535. raise TypeError(msg) from err
  536. elif is_valid_na_for_dtype(value, self.dtype):
  537. # GH#18295
  538. value = NaT
  539. elif isna(value):
  540. # if we are dt64tz and value is dt64("NaT"), dont cast to NaT,
  541. # or else we'll fail to raise in _unbox_scalar
  542. msg = self._validation_error_message(value, allow_listlike)
  543. raise TypeError(msg)
  544. elif isinstance(value, self._recognized_scalars):
  545. value = self._scalar_type(value)
  546. else:
  547. msg = self._validation_error_message(value, allow_listlike)
  548. raise TypeError(msg)
  549. if not unbox:
  550. # NB: In general NDArrayBackedExtensionArray will unbox here;
  551. # this option exists to prevent a performance hit in
  552. # TimedeltaIndex.get_loc
  553. return value
  554. return self._unbox_scalar(value)
  555. def _validation_error_message(self, value, allow_listlike: bool = False) -> str:
  556. """
  557. Construct an exception message on validation error.
  558. Some methods allow only scalar inputs, while others allow either scalar
  559. or listlike.
  560. Parameters
  561. ----------
  562. allow_listlike: bool, default False
  563. Returns
  564. -------
  565. str
  566. """
  567. if allow_listlike:
  568. msg = (
  569. f"value should be a '{self._scalar_type.__name__}', 'NaT', "
  570. f"or array of those. Got '{type(value).__name__}' instead."
  571. )
  572. else:
  573. msg = (
  574. f"value should be a '{self._scalar_type.__name__}' or 'NaT'. "
  575. f"Got '{type(value).__name__}' instead."
  576. )
  577. return msg
  578. def _validate_listlike(self, value, allow_object: bool = False):
  579. if isinstance(value, type(self)):
  580. return value
  581. if isinstance(value, list) and len(value) == 0:
  582. # We treat empty list as our own dtype.
  583. return type(self)._from_sequence([], dtype=self.dtype)
  584. if hasattr(value, "dtype") and value.dtype == object:
  585. # `array` below won't do inference if value is an Index or Series.
  586. # so do so here. in the Index case, inferred_type may be cached.
  587. if lib.infer_dtype(value) in self._infer_matches:
  588. try:
  589. value = type(self)._from_sequence(value)
  590. except (ValueError, TypeError):
  591. if allow_object:
  592. return value
  593. msg = self._validation_error_message(value, True)
  594. raise TypeError(msg)
  595. # Do type inference if necessary up front (after unpacking PandasArray)
  596. # e.g. we passed PeriodIndex.values and got an ndarray of Periods
  597. value = extract_array(value, extract_numpy=True)
  598. value = pd_array(value)
  599. value = extract_array(value, extract_numpy=True)
  600. if is_all_strings(value):
  601. # We got a StringArray
  602. try:
  603. # TODO: Could use from_sequence_of_strings if implemented
  604. # Note: passing dtype is necessary for PeriodArray tests
  605. value = type(self)._from_sequence(value, dtype=self.dtype)
  606. except ValueError:
  607. pass
  608. if is_categorical_dtype(value.dtype):
  609. # e.g. we have a Categorical holding self.dtype
  610. if is_dtype_equal(value.categories.dtype, self.dtype):
  611. # TODO: do we need equal dtype or just comparable?
  612. value = value._internal_get_values()
  613. value = extract_array(value, extract_numpy=True)
  614. if allow_object and is_object_dtype(value.dtype):
  615. pass
  616. elif not type(self)._is_recognized_dtype(value.dtype):
  617. msg = self._validation_error_message(value, True)
  618. raise TypeError(msg)
  619. return value
  620. def _validate_setitem_value(self, value):
  621. if is_list_like(value):
  622. value = self._validate_listlike(value)
  623. else:
  624. return self._validate_scalar(value, allow_listlike=True)
  625. return self._unbox(value)
  626. @final
  627. def _unbox(self, other) -> np.int64 | np.datetime64 | np.timedelta64 | np.ndarray:
  628. """
  629. Unbox either a scalar with _unbox_scalar or an instance of our own type.
  630. """
  631. if lib.is_scalar(other):
  632. other = self._unbox_scalar(other)
  633. else:
  634. # same type as self
  635. self._check_compatible_with(other)
  636. other = other._ndarray
  637. return other
  638. # ------------------------------------------------------------------
  639. # Additional array methods
  640. # These are not part of the EA API, but we implement them because
  641. # pandas assumes they're there.
  642. @ravel_compat
  643. def map(self, mapper):
  644. # TODO(GH-23179): Add ExtensionArray.map
  645. # Need to figure out if we want ExtensionArray.map first.
  646. # If so, then we can refactor IndexOpsMixin._map_values to
  647. # a standalone function and call from here..
  648. # Else, just rewrite _map_infer_values to do the right thing.
  649. from pandas import Index
  650. return Index(self).map(mapper).array
  651. def isin(self, values) -> npt.NDArray[np.bool_]:
  652. """
  653. Compute boolean array of whether each value is found in the
  654. passed set of values.
  655. Parameters
  656. ----------
  657. values : set or sequence of values
  658. Returns
  659. -------
  660. ndarray[bool]
  661. """
  662. if not hasattr(values, "dtype"):
  663. values = np.asarray(values)
  664. if values.dtype.kind in ["f", "i", "u", "c"]:
  665. # TODO: de-duplicate with equals, validate_comparison_value
  666. return np.zeros(self.shape, dtype=bool)
  667. if not isinstance(values, type(self)):
  668. inferable = [
  669. "timedelta",
  670. "timedelta64",
  671. "datetime",
  672. "datetime64",
  673. "date",
  674. "period",
  675. ]
  676. if values.dtype == object:
  677. inferred = lib.infer_dtype(values, skipna=False)
  678. if inferred not in inferable:
  679. if inferred == "string":
  680. pass
  681. elif "mixed" in inferred:
  682. return isin(self.astype(object), values)
  683. else:
  684. return np.zeros(self.shape, dtype=bool)
  685. try:
  686. values = type(self)._from_sequence(values)
  687. except ValueError:
  688. return isin(self.astype(object), values)
  689. if self.dtype.kind in ["m", "M"]:
  690. self = cast("DatetimeArray | TimedeltaArray", self)
  691. values = values.as_unit(self.unit)
  692. try:
  693. self._check_compatible_with(values)
  694. except (TypeError, ValueError):
  695. # Includes tzawareness mismatch and IncompatibleFrequencyError
  696. return np.zeros(self.shape, dtype=bool)
  697. return isin(self.asi8, values.asi8)
  698. # ------------------------------------------------------------------
  699. # Null Handling
  700. def isna(self) -> npt.NDArray[np.bool_]:
  701. return self._isnan
  702. @property # NB: override with cache_readonly in immutable subclasses
  703. def _isnan(self) -> npt.NDArray[np.bool_]:
  704. """
  705. return if each value is nan
  706. """
  707. return self.asi8 == iNaT
  708. @property # NB: override with cache_readonly in immutable subclasses
  709. def _hasna(self) -> bool:
  710. """
  711. return if I have any nans; enables various perf speedups
  712. """
  713. return bool(self._isnan.any())
  714. def _maybe_mask_results(
  715. self, result: np.ndarray, fill_value=iNaT, convert=None
  716. ) -> np.ndarray:
  717. """
  718. Parameters
  719. ----------
  720. result : np.ndarray
  721. fill_value : object, default iNaT
  722. convert : str, dtype or None
  723. Returns
  724. -------
  725. result : ndarray with values replace by the fill_value
  726. mask the result if needed, convert to the provided dtype if its not
  727. None
  728. This is an internal routine.
  729. """
  730. if self._hasna:
  731. if convert:
  732. result = result.astype(convert)
  733. if fill_value is None:
  734. fill_value = np.nan
  735. np.putmask(result, self._isnan, fill_value)
  736. return result
  737. # ------------------------------------------------------------------
  738. # Frequency Properties/Methods
  739. @property
  740. def freqstr(self) -> str | None:
  741. """
  742. Return the frequency object as a string if its set, otherwise None.
  743. """
  744. if self.freq is None:
  745. return None
  746. return self.freq.freqstr
  747. @property # NB: override with cache_readonly in immutable subclasses
  748. def inferred_freq(self) -> str | None:
  749. """
  750. Tries to return a string representing a frequency generated by infer_freq.
  751. Returns None if it can't autodetect the frequency.
  752. """
  753. if self.ndim != 1:
  754. return None
  755. try:
  756. return frequencies.infer_freq(self)
  757. except ValueError:
  758. return None
  759. @property # NB: override with cache_readonly in immutable subclasses
  760. def _resolution_obj(self) -> Resolution | None:
  761. freqstr = self.freqstr
  762. if freqstr is None:
  763. return None
  764. try:
  765. return Resolution.get_reso_from_freqstr(freqstr)
  766. except KeyError:
  767. return None
  768. @property # NB: override with cache_readonly in immutable subclasses
  769. def resolution(self) -> str:
  770. """
  771. Returns day, hour, minute, second, millisecond or microsecond
  772. """
  773. # error: Item "None" of "Optional[Any]" has no attribute "attrname"
  774. return self._resolution_obj.attrname # type: ignore[union-attr]
  775. # monotonicity/uniqueness properties are called via frequencies.infer_freq,
  776. # see GH#23789
  777. @property
  778. def _is_monotonic_increasing(self) -> bool:
  779. return algos.is_monotonic(self.asi8, timelike=True)[0]
  780. @property
  781. def _is_monotonic_decreasing(self) -> bool:
  782. return algos.is_monotonic(self.asi8, timelike=True)[1]
  783. @property
  784. def _is_unique(self) -> bool:
  785. return len(unique1d(self.asi8.ravel("K"))) == self.size
  786. # ------------------------------------------------------------------
  787. # Arithmetic Methods
  788. def _cmp_method(self, other, op):
  789. if self.ndim > 1 and getattr(other, "shape", None) == self.shape:
  790. # TODO: handle 2D-like listlikes
  791. return op(self.ravel(), other.ravel()).reshape(self.shape)
  792. try:
  793. other = self._validate_comparison_value(other)
  794. except InvalidComparison:
  795. return invalid_comparison(self, other, op)
  796. dtype = getattr(other, "dtype", None)
  797. if is_object_dtype(dtype):
  798. # We have to use comp_method_OBJECT_ARRAY instead of numpy
  799. # comparison otherwise it would fail to raise when
  800. # comparing tz-aware and tz-naive
  801. with np.errstate(all="ignore"):
  802. result = ops.comp_method_OBJECT_ARRAY(
  803. op, np.asarray(self.astype(object)), other
  804. )
  805. return result
  806. if other is NaT:
  807. if op is operator.ne:
  808. result = np.ones(self.shape, dtype=bool)
  809. else:
  810. result = np.zeros(self.shape, dtype=bool)
  811. return result
  812. if not is_period_dtype(self.dtype):
  813. self = cast(TimelikeOps, self)
  814. if self._creso != other._creso:
  815. if not isinstance(other, type(self)):
  816. # i.e. Timedelta/Timestamp, cast to ndarray and let
  817. # compare_mismatched_resolutions handle broadcasting
  818. try:
  819. # GH#52080 see if we can losslessly cast to shared unit
  820. other = other.as_unit(self.unit, round_ok=False)
  821. except ValueError:
  822. other_arr = np.array(other.asm8)
  823. return compare_mismatched_resolutions(
  824. self._ndarray, other_arr, op
  825. )
  826. else:
  827. other_arr = other._ndarray
  828. return compare_mismatched_resolutions(self._ndarray, other_arr, op)
  829. other_vals = self._unbox(other)
  830. # GH#37462 comparison on i8 values is almost 2x faster than M8/m8
  831. result = op(self._ndarray.view("i8"), other_vals.view("i8"))
  832. o_mask = isna(other)
  833. mask = self._isnan | o_mask
  834. if mask.any():
  835. nat_result = op is operator.ne
  836. np.putmask(result, mask, nat_result)
  837. return result
  838. # pow is invalid for all three subclasses; TimedeltaArray will override
  839. # the multiplication and division ops
  840. __pow__ = make_invalid_op("__pow__")
  841. __rpow__ = make_invalid_op("__rpow__")
  842. __mul__ = make_invalid_op("__mul__")
  843. __rmul__ = make_invalid_op("__rmul__")
  844. __truediv__ = make_invalid_op("__truediv__")
  845. __rtruediv__ = make_invalid_op("__rtruediv__")
  846. __floordiv__ = make_invalid_op("__floordiv__")
  847. __rfloordiv__ = make_invalid_op("__rfloordiv__")
  848. __mod__ = make_invalid_op("__mod__")
  849. __rmod__ = make_invalid_op("__rmod__")
  850. __divmod__ = make_invalid_op("__divmod__")
  851. __rdivmod__ = make_invalid_op("__rdivmod__")
  852. @final
  853. def _get_i8_values_and_mask(
  854. self, other
  855. ) -> tuple[int | npt.NDArray[np.int64], None | npt.NDArray[np.bool_]]:
  856. """
  857. Get the int64 values and b_mask to pass to checked_add_with_arr.
  858. """
  859. if isinstance(other, Period):
  860. i8values = other.ordinal
  861. mask = None
  862. elif isinstance(other, (Timestamp, Timedelta)):
  863. i8values = other._value
  864. mask = None
  865. else:
  866. # PeriodArray, DatetimeArray, TimedeltaArray
  867. mask = other._isnan
  868. i8values = other.asi8
  869. return i8values, mask
  870. @final
  871. def _get_arithmetic_result_freq(self, other) -> BaseOffset | None:
  872. """
  873. Check if we can preserve self.freq in addition or subtraction.
  874. """
  875. # Adding or subtracting a Timedelta/Timestamp scalar is freq-preserving
  876. # whenever self.freq is a Tick
  877. if is_period_dtype(self.dtype):
  878. return self.freq
  879. elif not lib.is_scalar(other):
  880. return None
  881. elif isinstance(self.freq, Tick):
  882. # In these cases
  883. return self.freq
  884. return None
  885. @final
  886. def _add_datetimelike_scalar(self, other) -> DatetimeArray:
  887. if not is_timedelta64_dtype(self.dtype):
  888. raise TypeError(
  889. f"cannot add {type(self).__name__} and {type(other).__name__}"
  890. )
  891. self = cast("TimedeltaArray", self)
  892. from pandas.core.arrays import DatetimeArray
  893. from pandas.core.arrays.datetimes import tz_to_dtype
  894. assert other is not NaT
  895. if isna(other):
  896. # i.e. np.datetime64("NaT")
  897. # In this case we specifically interpret NaT as a datetime, not
  898. # the timedelta interpretation we would get by returning self + NaT
  899. result = self._ndarray + NaT.to_datetime64().astype(f"M8[{self.unit}]")
  900. # Preserve our resolution
  901. return DatetimeArray._simple_new(result, dtype=result.dtype)
  902. other = Timestamp(other)
  903. self, other = self._ensure_matching_resos(other)
  904. self = cast("TimedeltaArray", self)
  905. other_i8, o_mask = self._get_i8_values_and_mask(other)
  906. result = checked_add_with_arr(
  907. self.asi8, other_i8, arr_mask=self._isnan, b_mask=o_mask
  908. )
  909. res_values = result.view(f"M8[{self.unit}]")
  910. dtype = tz_to_dtype(tz=other.tz, unit=self.unit)
  911. res_values = result.view(f"M8[{self.unit}]")
  912. new_freq = self._get_arithmetic_result_freq(other)
  913. return DatetimeArray._simple_new(res_values, dtype=dtype, freq=new_freq)
  914. @final
  915. def _add_datetime_arraylike(self, other: DatetimeArray) -> DatetimeArray:
  916. if not is_timedelta64_dtype(self.dtype):
  917. raise TypeError(
  918. f"cannot add {type(self).__name__} and {type(other).__name__}"
  919. )
  920. # defer to DatetimeArray.__add__
  921. return other + self
  922. @final
  923. def _sub_datetimelike_scalar(self, other: datetime | np.datetime64):
  924. if self.dtype.kind != "M":
  925. raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}")
  926. self = cast("DatetimeArray", self)
  927. # subtract a datetime from myself, yielding a ndarray[timedelta64[ns]]
  928. if isna(other):
  929. # i.e. np.datetime64("NaT")
  930. return self - NaT
  931. ts = Timestamp(other)
  932. self, ts = self._ensure_matching_resos(ts)
  933. return self._sub_datetimelike(ts)
  934. @final
  935. def _sub_datetime_arraylike(self, other: DatetimeArray):
  936. if self.dtype.kind != "M":
  937. raise TypeError(f"cannot subtract a datelike from a {type(self).__name__}")
  938. if len(self) != len(other):
  939. raise ValueError("cannot add indices of unequal length")
  940. self = cast("DatetimeArray", self)
  941. self, other = self._ensure_matching_resos(other)
  942. return self._sub_datetimelike(other)
  943. @final
  944. def _sub_datetimelike(self, other: Timestamp | DatetimeArray) -> TimedeltaArray:
  945. self = cast("DatetimeArray", self)
  946. from pandas.core.arrays import TimedeltaArray
  947. try:
  948. self._assert_tzawareness_compat(other)
  949. except TypeError as err:
  950. new_message = str(err).replace("compare", "subtract")
  951. raise type(err)(new_message) from err
  952. other_i8, o_mask = self._get_i8_values_and_mask(other)
  953. res_values = checked_add_with_arr(
  954. self.asi8, -other_i8, arr_mask=self._isnan, b_mask=o_mask
  955. )
  956. res_m8 = res_values.view(f"timedelta64[{self.unit}]")
  957. new_freq = self._get_arithmetic_result_freq(other)
  958. return TimedeltaArray._simple_new(res_m8, dtype=res_m8.dtype, freq=new_freq)
  959. @final
  960. def _add_period(self, other: Period) -> PeriodArray:
  961. if not is_timedelta64_dtype(self.dtype):
  962. raise TypeError(f"cannot add Period to a {type(self).__name__}")
  963. # We will wrap in a PeriodArray and defer to the reversed operation
  964. from pandas.core.arrays.period import PeriodArray
  965. i8vals = np.broadcast_to(other.ordinal, self.shape)
  966. parr = PeriodArray(i8vals, freq=other.freq)
  967. return parr + self
  968. def _add_offset(self, offset):
  969. raise AbstractMethodError(self)
  970. def _add_timedeltalike_scalar(self, other):
  971. """
  972. Add a delta of a timedeltalike
  973. Returns
  974. -------
  975. Same type as self
  976. """
  977. if isna(other):
  978. # i.e np.timedelta64("NaT")
  979. new_values = np.empty(self.shape, dtype="i8").view(self._ndarray.dtype)
  980. new_values.fill(iNaT)
  981. return type(self)._simple_new(new_values, dtype=self.dtype)
  982. # PeriodArray overrides, so we only get here with DTA/TDA
  983. self = cast("DatetimeArray | TimedeltaArray", self)
  984. other = Timedelta(other)
  985. self, other = self._ensure_matching_resos(other)
  986. return self._add_timedeltalike(other)
  987. def _add_timedelta_arraylike(self, other: TimedeltaArray):
  988. """
  989. Add a delta of a TimedeltaIndex
  990. Returns
  991. -------
  992. Same type as self
  993. """
  994. # overridden by PeriodArray
  995. if len(self) != len(other):
  996. raise ValueError("cannot add indices of unequal length")
  997. self = cast("DatetimeArray | TimedeltaArray", self)
  998. self, other = self._ensure_matching_resos(other)
  999. return self._add_timedeltalike(other)
  1000. @final
  1001. def _add_timedeltalike(self, other: Timedelta | TimedeltaArray):
  1002. self = cast("DatetimeArray | TimedeltaArray", self)
  1003. other_i8, o_mask = self._get_i8_values_and_mask(other)
  1004. new_values = checked_add_with_arr(
  1005. self.asi8, other_i8, arr_mask=self._isnan, b_mask=o_mask
  1006. )
  1007. res_values = new_values.view(self._ndarray.dtype)
  1008. new_freq = self._get_arithmetic_result_freq(other)
  1009. return type(self)._simple_new(res_values, dtype=self.dtype, freq=new_freq)
  1010. @final
  1011. def _add_nat(self):
  1012. """
  1013. Add pd.NaT to self
  1014. """
  1015. if is_period_dtype(self.dtype):
  1016. raise TypeError(
  1017. f"Cannot add {type(self).__name__} and {type(NaT).__name__}"
  1018. )
  1019. self = cast("TimedeltaArray | DatetimeArray", self)
  1020. # GH#19124 pd.NaT is treated like a timedelta for both timedelta
  1021. # and datetime dtypes
  1022. result = np.empty(self.shape, dtype=np.int64)
  1023. result.fill(iNaT)
  1024. result = result.view(self._ndarray.dtype) # preserve reso
  1025. return type(self)._simple_new(result, dtype=self.dtype, freq=None)
  1026. @final
  1027. def _sub_nat(self):
  1028. """
  1029. Subtract pd.NaT from self
  1030. """
  1031. # GH#19124 Timedelta - datetime is not in general well-defined.
  1032. # We make an exception for pd.NaT, which in this case quacks
  1033. # like a timedelta.
  1034. # For datetime64 dtypes by convention we treat NaT as a datetime, so
  1035. # this subtraction returns a timedelta64 dtype.
  1036. # For period dtype, timedelta64 is a close-enough return dtype.
  1037. result = np.empty(self.shape, dtype=np.int64)
  1038. result.fill(iNaT)
  1039. if self.dtype.kind in ["m", "M"]:
  1040. # We can retain unit in dtype
  1041. self = cast("DatetimeArray| TimedeltaArray", self)
  1042. return result.view(f"timedelta64[{self.unit}]")
  1043. else:
  1044. return result.view("timedelta64[ns]")
  1045. @final
  1046. def _sub_periodlike(self, other: Period | PeriodArray) -> npt.NDArray[np.object_]:
  1047. # If the operation is well-defined, we return an object-dtype ndarray
  1048. # of DateOffsets. Null entries are filled with pd.NaT
  1049. if not is_period_dtype(self.dtype):
  1050. raise TypeError(
  1051. f"cannot subtract {type(other).__name__} from {type(self).__name__}"
  1052. )
  1053. self = cast("PeriodArray", self)
  1054. self._check_compatible_with(other)
  1055. other_i8, o_mask = self._get_i8_values_and_mask(other)
  1056. new_i8_data = checked_add_with_arr(
  1057. self.asi8, -other_i8, arr_mask=self._isnan, b_mask=o_mask
  1058. )
  1059. new_data = np.array([self.freq.base * x for x in new_i8_data])
  1060. if o_mask is None:
  1061. # i.e. Period scalar
  1062. mask = self._isnan
  1063. else:
  1064. # i.e. PeriodArray
  1065. mask = self._isnan | o_mask
  1066. new_data[mask] = NaT
  1067. return new_data
  1068. @final
  1069. def _addsub_object_array(self, other: npt.NDArray[np.object_], op):
  1070. """
  1071. Add or subtract array-like of DateOffset objects
  1072. Parameters
  1073. ----------
  1074. other : np.ndarray[object]
  1075. op : {operator.add, operator.sub}
  1076. Returns
  1077. -------
  1078. np.ndarray[object]
  1079. Except in fastpath case with length 1 where we operate on the
  1080. contained scalar.
  1081. """
  1082. assert op in [operator.add, operator.sub]
  1083. if len(other) == 1 and self.ndim == 1:
  1084. # Note: without this special case, we could annotate return type
  1085. # as ndarray[object]
  1086. # If both 1D then broadcasting is unambiguous
  1087. return op(self, other[0])
  1088. warnings.warn(
  1089. "Adding/subtracting object-dtype array to "
  1090. f"{type(self).__name__} not vectorized.",
  1091. PerformanceWarning,
  1092. stacklevel=find_stack_level(),
  1093. )
  1094. # Caller is responsible for broadcasting if necessary
  1095. assert self.shape == other.shape, (self.shape, other.shape)
  1096. res_values = op(self.astype("O"), np.asarray(other))
  1097. return res_values
  1098. def _accumulate(self, name: str, *, skipna: bool = True, **kwargs):
  1099. if name not in {"cummin", "cummax"}:
  1100. raise TypeError(f"Accumulation {name} not supported for {type(self)}")
  1101. op = getattr(datetimelike_accumulations, name)
  1102. result = op(self.copy(), skipna=skipna, **kwargs)
  1103. return type(self)._simple_new(
  1104. result, freq=None, dtype=self.dtype # type: ignore[call-arg]
  1105. )
  1106. @unpack_zerodim_and_defer("__add__")
  1107. def __add__(self, other):
  1108. other_dtype = getattr(other, "dtype", None)
  1109. other = ensure_wrapped_if_datetimelike(other)
  1110. # scalar others
  1111. if other is NaT:
  1112. result = self._add_nat()
  1113. elif isinstance(other, (Tick, timedelta, np.timedelta64)):
  1114. result = self._add_timedeltalike_scalar(other)
  1115. elif isinstance(other, BaseOffset):
  1116. # specifically _not_ a Tick
  1117. result = self._add_offset(other)
  1118. elif isinstance(other, (datetime, np.datetime64)):
  1119. result = self._add_datetimelike_scalar(other)
  1120. elif isinstance(other, Period) and is_timedelta64_dtype(self.dtype):
  1121. result = self._add_period(other)
  1122. elif lib.is_integer(other):
  1123. # This check must come after the check for np.timedelta64
  1124. # as is_integer returns True for these
  1125. if not is_period_dtype(self.dtype):
  1126. raise integer_op_not_supported(self)
  1127. obj = cast("PeriodArray", self)
  1128. result = obj._addsub_int_array_or_scalar(other * obj.freq.n, operator.add)
  1129. # array-like others
  1130. elif is_timedelta64_dtype(other_dtype):
  1131. # TimedeltaIndex, ndarray[timedelta64]
  1132. result = self._add_timedelta_arraylike(other)
  1133. elif is_object_dtype(other_dtype):
  1134. # e.g. Array/Index of DateOffset objects
  1135. result = self._addsub_object_array(other, operator.add)
  1136. elif is_datetime64_dtype(other_dtype) or is_datetime64tz_dtype(other_dtype):
  1137. # DatetimeIndex, ndarray[datetime64]
  1138. return self._add_datetime_arraylike(other)
  1139. elif is_integer_dtype(other_dtype):
  1140. if not is_period_dtype(self.dtype):
  1141. raise integer_op_not_supported(self)
  1142. obj = cast("PeriodArray", self)
  1143. result = obj._addsub_int_array_or_scalar(other * obj.freq.n, operator.add)
  1144. else:
  1145. # Includes Categorical, other ExtensionArrays
  1146. # For PeriodDtype, if self is a TimedeltaArray and other is a
  1147. # PeriodArray with a timedelta-like (i.e. Tick) freq, this
  1148. # operation is valid. Defer to the PeriodArray implementation.
  1149. # In remaining cases, this will end up raising TypeError.
  1150. return NotImplemented
  1151. if isinstance(result, np.ndarray) and is_timedelta64_dtype(result.dtype):
  1152. from pandas.core.arrays import TimedeltaArray
  1153. return TimedeltaArray(result)
  1154. return result
  1155. def __radd__(self, other):
  1156. # alias for __add__
  1157. return self.__add__(other)
  1158. @unpack_zerodim_and_defer("__sub__")
  1159. def __sub__(self, other):
  1160. other_dtype = getattr(other, "dtype", None)
  1161. other = ensure_wrapped_if_datetimelike(other)
  1162. # scalar others
  1163. if other is NaT:
  1164. result = self._sub_nat()
  1165. elif isinstance(other, (Tick, timedelta, np.timedelta64)):
  1166. result = self._add_timedeltalike_scalar(-other)
  1167. elif isinstance(other, BaseOffset):
  1168. # specifically _not_ a Tick
  1169. result = self._add_offset(-other)
  1170. elif isinstance(other, (datetime, np.datetime64)):
  1171. result = self._sub_datetimelike_scalar(other)
  1172. elif lib.is_integer(other):
  1173. # This check must come after the check for np.timedelta64
  1174. # as is_integer returns True for these
  1175. if not is_period_dtype(self.dtype):
  1176. raise integer_op_not_supported(self)
  1177. obj = cast("PeriodArray", self)
  1178. result = obj._addsub_int_array_or_scalar(other * obj.freq.n, operator.sub)
  1179. elif isinstance(other, Period):
  1180. result = self._sub_periodlike(other)
  1181. # array-like others
  1182. elif is_timedelta64_dtype(other_dtype):
  1183. # TimedeltaIndex, ndarray[timedelta64]
  1184. result = self._add_timedelta_arraylike(-other)
  1185. elif is_object_dtype(other_dtype):
  1186. # e.g. Array/Index of DateOffset objects
  1187. result = self._addsub_object_array(other, operator.sub)
  1188. elif is_datetime64_dtype(other_dtype) or is_datetime64tz_dtype(other_dtype):
  1189. # DatetimeIndex, ndarray[datetime64]
  1190. result = self._sub_datetime_arraylike(other)
  1191. elif is_period_dtype(other_dtype):
  1192. # PeriodIndex
  1193. result = self._sub_periodlike(other)
  1194. elif is_integer_dtype(other_dtype):
  1195. if not is_period_dtype(self.dtype):
  1196. raise integer_op_not_supported(self)
  1197. obj = cast("PeriodArray", self)
  1198. result = obj._addsub_int_array_or_scalar(other * obj.freq.n, operator.sub)
  1199. else:
  1200. # Includes ExtensionArrays, float_dtype
  1201. return NotImplemented
  1202. if isinstance(result, np.ndarray) and is_timedelta64_dtype(result.dtype):
  1203. from pandas.core.arrays import TimedeltaArray
  1204. return TimedeltaArray(result)
  1205. return result
  1206. def __rsub__(self, other):
  1207. other_dtype = getattr(other, "dtype", None)
  1208. if is_datetime64_any_dtype(other_dtype) and is_timedelta64_dtype(self.dtype):
  1209. # ndarray[datetime64] cannot be subtracted from self, so
  1210. # we need to wrap in DatetimeArray/Index and flip the operation
  1211. if lib.is_scalar(other):
  1212. # i.e. np.datetime64 object
  1213. return Timestamp(other) - self
  1214. if not isinstance(other, DatetimeLikeArrayMixin):
  1215. # Avoid down-casting DatetimeIndex
  1216. from pandas.core.arrays import DatetimeArray
  1217. other = DatetimeArray(other)
  1218. return other - self
  1219. elif (
  1220. is_datetime64_any_dtype(self.dtype)
  1221. and hasattr(other, "dtype")
  1222. and not is_datetime64_any_dtype(other.dtype)
  1223. ):
  1224. # GH#19959 datetime - datetime is well-defined as timedelta,
  1225. # but any other type - datetime is not well-defined.
  1226. raise TypeError(
  1227. f"cannot subtract {type(self).__name__} from {type(other).__name__}"
  1228. )
  1229. elif is_period_dtype(self.dtype) and is_timedelta64_dtype(other_dtype):
  1230. # TODO: Can we simplify/generalize these cases at all?
  1231. raise TypeError(f"cannot subtract {type(self).__name__} from {other.dtype}")
  1232. elif is_timedelta64_dtype(self.dtype):
  1233. self = cast("TimedeltaArray", self)
  1234. return (-self) + other
  1235. # We get here with e.g. datetime objects
  1236. return -(self - other)
  1237. def __iadd__(self: DatetimeLikeArrayT, other) -> DatetimeLikeArrayT:
  1238. result = self + other
  1239. self[:] = result[:]
  1240. if not is_period_dtype(self.dtype):
  1241. # restore freq, which is invalidated by setitem
  1242. self._freq = result.freq
  1243. return self
  1244. def __isub__(self: DatetimeLikeArrayT, other) -> DatetimeLikeArrayT:
  1245. result = self - other
  1246. self[:] = result[:]
  1247. if not is_period_dtype(self.dtype):
  1248. # restore freq, which is invalidated by setitem
  1249. self._freq = result.freq
  1250. return self
  1251. # --------------------------------------------------------------
  1252. # Reductions
  1253. @_period_dispatch
  1254. def _quantile(
  1255. self: DatetimeLikeArrayT,
  1256. qs: npt.NDArray[np.float64],
  1257. interpolation: str,
  1258. ) -> DatetimeLikeArrayT:
  1259. return super()._quantile(qs=qs, interpolation=interpolation)
  1260. @_period_dispatch
  1261. def min(self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs):
  1262. """
  1263. Return the minimum value of the Array or minimum along
  1264. an axis.
  1265. See Also
  1266. --------
  1267. numpy.ndarray.min
  1268. Index.min : Return the minimum value in an Index.
  1269. Series.min : Return the minimum value in a Series.
  1270. """
  1271. nv.validate_min((), kwargs)
  1272. nv.validate_minmax_axis(axis, self.ndim)
  1273. result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna)
  1274. return self._wrap_reduction_result(axis, result)
  1275. @_period_dispatch
  1276. def max(self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs):
  1277. """
  1278. Return the maximum value of the Array or maximum along
  1279. an axis.
  1280. See Also
  1281. --------
  1282. numpy.ndarray.max
  1283. Index.max : Return the maximum value in an Index.
  1284. Series.max : Return the maximum value in a Series.
  1285. """
  1286. nv.validate_max((), kwargs)
  1287. nv.validate_minmax_axis(axis, self.ndim)
  1288. result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna)
  1289. return self._wrap_reduction_result(axis, result)
  1290. def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0):
  1291. """
  1292. Return the mean value of the Array.
  1293. Parameters
  1294. ----------
  1295. skipna : bool, default True
  1296. Whether to ignore any NaT elements.
  1297. axis : int, optional, default 0
  1298. Returns
  1299. -------
  1300. scalar
  1301. Timestamp or Timedelta.
  1302. See Also
  1303. --------
  1304. numpy.ndarray.mean : Returns the average of array elements along a given axis.
  1305. Series.mean : Return the mean value in a Series.
  1306. Notes
  1307. -----
  1308. mean is only defined for Datetime and Timedelta dtypes, not for Period.
  1309. """
  1310. if is_period_dtype(self.dtype):
  1311. # See discussion in GH#24757
  1312. raise TypeError(
  1313. f"mean is not implemented for {type(self).__name__} since the "
  1314. "meaning is ambiguous. An alternative is "
  1315. "obj.to_timestamp(how='start').mean()"
  1316. )
  1317. result = nanops.nanmean(
  1318. self._ndarray, axis=axis, skipna=skipna, mask=self.isna()
  1319. )
  1320. return self._wrap_reduction_result(axis, result)
  1321. @_period_dispatch
  1322. def median(self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs):
  1323. nv.validate_median((), kwargs)
  1324. if axis is not None and abs(axis) >= self.ndim:
  1325. raise ValueError("abs(axis) must be less than ndim")
  1326. result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)
  1327. return self._wrap_reduction_result(axis, result)
  1328. def _mode(self, dropna: bool = True):
  1329. mask = None
  1330. if dropna:
  1331. mask = self.isna()
  1332. i8modes = algorithms.mode(self.view("i8"), mask=mask)
  1333. npmodes = i8modes.view(self._ndarray.dtype)
  1334. npmodes = cast(np.ndarray, npmodes)
  1335. return self._from_backing_data(npmodes)
  1336. class DatelikeOps(DatetimeLikeArrayMixin):
  1337. """
  1338. Common ops for DatetimeIndex/PeriodIndex, but not TimedeltaIndex.
  1339. """
  1340. @Substitution(
  1341. URL="https://docs.python.org/3/library/datetime.html"
  1342. "#strftime-and-strptime-behavior"
  1343. )
  1344. def strftime(self, date_format: str) -> npt.NDArray[np.object_]:
  1345. """
  1346. Convert to Index using specified date_format.
  1347. Return an Index of formatted strings specified by date_format, which
  1348. supports the same string format as the python standard library. Details
  1349. of the string format can be found in `python string format
  1350. doc <%(URL)s>`__.
  1351. Formats supported by the C `strftime` API but not by the python string format
  1352. doc (such as `"%%R"`, `"%%r"`) are not officially supported and should be
  1353. preferably replaced with their supported equivalents (such as `"%%H:%%M"`,
  1354. `"%%I:%%M:%%S %%p"`).
  1355. Note that `PeriodIndex` support additional directives, detailed in
  1356. `Period.strftime`.
  1357. Parameters
  1358. ----------
  1359. date_format : str
  1360. Date format string (e.g. "%%Y-%%m-%%d").
  1361. Returns
  1362. -------
  1363. ndarray[object]
  1364. NumPy ndarray of formatted strings.
  1365. See Also
  1366. --------
  1367. to_datetime : Convert the given argument to datetime.
  1368. DatetimeIndex.normalize : Return DatetimeIndex with times to midnight.
  1369. DatetimeIndex.round : Round the DatetimeIndex to the specified freq.
  1370. DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq.
  1371. Timestamp.strftime : Format a single Timestamp.
  1372. Period.strftime : Format a single Period.
  1373. Examples
  1374. --------
  1375. >>> rng = pd.date_range(pd.Timestamp("2018-03-10 09:00"),
  1376. ... periods=3, freq='s')
  1377. >>> rng.strftime('%%B %%d, %%Y, %%r')
  1378. Index(['March 10, 2018, 09:00:00 AM', 'March 10, 2018, 09:00:01 AM',
  1379. 'March 10, 2018, 09:00:02 AM'],
  1380. dtype='object')
  1381. """
  1382. result = self._format_native_types(date_format=date_format, na_rep=np.nan)
  1383. return result.astype(object, copy=False)
  1384. _round_doc = """
  1385. Perform {op} operation on the data to the specified `freq`.
  1386. Parameters
  1387. ----------
  1388. freq : str or Offset
  1389. The frequency level to {op} the index to. Must be a fixed
  1390. frequency like 'S' (second) not 'ME' (month end). See
  1391. :ref:`frequency aliases <timeseries.offset_aliases>` for
  1392. a list of possible `freq` values.
  1393. ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
  1394. Only relevant for DatetimeIndex:
  1395. - 'infer' will attempt to infer fall dst-transition hours based on
  1396. order
  1397. - bool-ndarray where True signifies a DST time, False designates
  1398. a non-DST time (note that this flag is only applicable for
  1399. ambiguous times)
  1400. - 'NaT' will return NaT where there are ambiguous times
  1401. - 'raise' will raise an AmbiguousTimeError if there are ambiguous
  1402. times.
  1403. nonexistent : 'shift_forward', 'shift_backward', 'NaT', timedelta, default 'raise'
  1404. A nonexistent time does not exist in a particular timezone
  1405. where clocks moved forward due to DST.
  1406. - 'shift_forward' will shift the nonexistent time forward to the
  1407. closest existing time
  1408. - 'shift_backward' will shift the nonexistent time backward to the
  1409. closest existing time
  1410. - 'NaT' will return NaT where there are nonexistent times
  1411. - timedelta objects will shift nonexistent times by the timedelta
  1412. - 'raise' will raise an NonExistentTimeError if there are
  1413. nonexistent times.
  1414. Returns
  1415. -------
  1416. DatetimeIndex, TimedeltaIndex, or Series
  1417. Index of the same type for a DatetimeIndex or TimedeltaIndex,
  1418. or a Series with the same index for a Series.
  1419. Raises
  1420. ------
  1421. ValueError if the `freq` cannot be converted.
  1422. Notes
  1423. -----
  1424. If the timestamps have a timezone, {op}ing will take place relative to the
  1425. local ("wall") time and re-localized to the same timezone. When {op}ing
  1426. near daylight savings time, use ``nonexistent`` and ``ambiguous`` to
  1427. control the re-localization behavior.
  1428. Examples
  1429. --------
  1430. **DatetimeIndex**
  1431. >>> rng = pd.date_range('1/1/2018 11:59:00', periods=3, freq='min')
  1432. >>> rng
  1433. DatetimeIndex(['2018-01-01 11:59:00', '2018-01-01 12:00:00',
  1434. '2018-01-01 12:01:00'],
  1435. dtype='datetime64[ns]', freq='T')
  1436. """
  1437. _round_example = """>>> rng.round('H')
  1438. DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
  1439. '2018-01-01 12:00:00'],
  1440. dtype='datetime64[ns]', freq=None)
  1441. **Series**
  1442. >>> pd.Series(rng).dt.round("H")
  1443. 0 2018-01-01 12:00:00
  1444. 1 2018-01-01 12:00:00
  1445. 2 2018-01-01 12:00:00
  1446. dtype: datetime64[ns]
  1447. When rounding near a daylight savings time transition, use ``ambiguous`` or
  1448. ``nonexistent`` to control how the timestamp should be re-localized.
  1449. >>> rng_tz = pd.DatetimeIndex(["2021-10-31 03:30:00"], tz="Europe/Amsterdam")
  1450. >>> rng_tz.floor("2H", ambiguous=False)
  1451. DatetimeIndex(['2021-10-31 02:00:00+01:00'],
  1452. dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
  1453. >>> rng_tz.floor("2H", ambiguous=True)
  1454. DatetimeIndex(['2021-10-31 02:00:00+02:00'],
  1455. dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
  1456. """
  1457. _floor_example = """>>> rng.floor('H')
  1458. DatetimeIndex(['2018-01-01 11:00:00', '2018-01-01 12:00:00',
  1459. '2018-01-01 12:00:00'],
  1460. dtype='datetime64[ns]', freq=None)
  1461. **Series**
  1462. >>> pd.Series(rng).dt.floor("H")
  1463. 0 2018-01-01 11:00:00
  1464. 1 2018-01-01 12:00:00
  1465. 2 2018-01-01 12:00:00
  1466. dtype: datetime64[ns]
  1467. When rounding near a daylight savings time transition, use ``ambiguous`` or
  1468. ``nonexistent`` to control how the timestamp should be re-localized.
  1469. >>> rng_tz = pd.DatetimeIndex(["2021-10-31 03:30:00"], tz="Europe/Amsterdam")
  1470. >>> rng_tz.floor("2H", ambiguous=False)
  1471. DatetimeIndex(['2021-10-31 02:00:00+01:00'],
  1472. dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
  1473. >>> rng_tz.floor("2H", ambiguous=True)
  1474. DatetimeIndex(['2021-10-31 02:00:00+02:00'],
  1475. dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
  1476. """
  1477. _ceil_example = """>>> rng.ceil('H')
  1478. DatetimeIndex(['2018-01-01 12:00:00', '2018-01-01 12:00:00',
  1479. '2018-01-01 13:00:00'],
  1480. dtype='datetime64[ns]', freq=None)
  1481. **Series**
  1482. >>> pd.Series(rng).dt.ceil("H")
  1483. 0 2018-01-01 12:00:00
  1484. 1 2018-01-01 12:00:00
  1485. 2 2018-01-01 13:00:00
  1486. dtype: datetime64[ns]
  1487. When rounding near a daylight savings time transition, use ``ambiguous`` or
  1488. ``nonexistent`` to control how the timestamp should be re-localized.
  1489. >>> rng_tz = pd.DatetimeIndex(["2021-10-31 01:30:00"], tz="Europe/Amsterdam")
  1490. >>> rng_tz.ceil("H", ambiguous=False)
  1491. DatetimeIndex(['2021-10-31 02:00:00+01:00'],
  1492. dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
  1493. >>> rng_tz.ceil("H", ambiguous=True)
  1494. DatetimeIndex(['2021-10-31 02:00:00+02:00'],
  1495. dtype='datetime64[ns, Europe/Amsterdam]', freq=None)
  1496. """
  1497. TimelikeOpsT = TypeVar("TimelikeOpsT", bound="TimelikeOps")
  1498. class TimelikeOps(DatetimeLikeArrayMixin):
  1499. """
  1500. Common ops for TimedeltaIndex/DatetimeIndex, but not PeriodIndex.
  1501. """
  1502. _default_dtype: np.dtype
  1503. def __init__(
  1504. self, values, dtype=None, freq=lib.no_default, copy: bool = False
  1505. ) -> None:
  1506. values = extract_array(values, extract_numpy=True)
  1507. if isinstance(values, IntegerArray):
  1508. values = values.to_numpy("int64", na_value=iNaT)
  1509. inferred_freq = getattr(values, "_freq", None)
  1510. explicit_none = freq is None
  1511. freq = freq if freq is not lib.no_default else None
  1512. if isinstance(values, type(self)):
  1513. if explicit_none:
  1514. # don't inherit from values
  1515. pass
  1516. elif freq is None:
  1517. freq = values.freq
  1518. elif freq and values.freq:
  1519. freq = to_offset(freq)
  1520. freq, _ = validate_inferred_freq(freq, values.freq, False)
  1521. if dtype is not None:
  1522. dtype = pandas_dtype(dtype)
  1523. if not is_dtype_equal(dtype, values.dtype):
  1524. # TODO: we only have tests for this for DTA, not TDA (2022-07-01)
  1525. raise TypeError(
  1526. f"dtype={dtype} does not match data dtype {values.dtype}"
  1527. )
  1528. dtype = values.dtype
  1529. values = values._ndarray
  1530. elif dtype is None:
  1531. if isinstance(values, np.ndarray) and values.dtype.kind in "Mm":
  1532. dtype = values.dtype
  1533. else:
  1534. dtype = self._default_dtype
  1535. if not isinstance(values, np.ndarray):
  1536. raise ValueError(
  1537. f"Unexpected type '{type(values).__name__}'. 'values' must be a "
  1538. f"{type(self).__name__}, ndarray, or Series or Index "
  1539. "containing one of those."
  1540. )
  1541. if values.ndim not in [1, 2]:
  1542. raise ValueError("Only 1-dimensional input arrays are supported.")
  1543. if values.dtype == "i8":
  1544. # for compat with datetime/timedelta/period shared methods,
  1545. # we can sometimes get here with int64 values. These represent
  1546. # nanosecond UTC (or tz-naive) unix timestamps
  1547. values = values.view(self._default_dtype)
  1548. dtype = self._validate_dtype(values, dtype)
  1549. if freq == "infer":
  1550. raise ValueError(
  1551. f"Frequency inference not allowed in {type(self).__name__}.__init__. "
  1552. "Use 'pd.array()' instead."
  1553. )
  1554. if copy:
  1555. values = values.copy()
  1556. if freq:
  1557. freq = to_offset(freq)
  1558. NDArrayBacked.__init__(self, values=values, dtype=dtype)
  1559. self._freq = freq
  1560. if inferred_freq is None and freq is not None:
  1561. type(self)._validate_frequency(self, freq)
  1562. @classmethod
  1563. def _validate_dtype(cls, values, dtype):
  1564. raise AbstractMethodError(cls)
  1565. @property
  1566. def freq(self):
  1567. """
  1568. Return the frequency object if it is set, otherwise None.
  1569. """
  1570. return self._freq
  1571. @freq.setter
  1572. def freq(self, value) -> None:
  1573. if value is not None:
  1574. value = to_offset(value)
  1575. self._validate_frequency(self, value)
  1576. if self.ndim > 1:
  1577. raise ValueError("Cannot set freq with ndim > 1")
  1578. self._freq = value
  1579. @classmethod
  1580. def _validate_frequency(cls, index, freq, **kwargs):
  1581. """
  1582. Validate that a frequency is compatible with the values of a given
  1583. Datetime Array/Index or Timedelta Array/Index
  1584. Parameters
  1585. ----------
  1586. index : DatetimeIndex or TimedeltaIndex
  1587. The index on which to determine if the given frequency is valid
  1588. freq : DateOffset
  1589. The frequency to validate
  1590. """
  1591. inferred = index.inferred_freq
  1592. if index.size == 0 or inferred == freq.freqstr:
  1593. return None
  1594. try:
  1595. on_freq = cls._generate_range(
  1596. start=index[0],
  1597. end=None,
  1598. periods=len(index),
  1599. freq=freq,
  1600. unit=index.unit,
  1601. **kwargs,
  1602. )
  1603. if not np.array_equal(index.asi8, on_freq.asi8):
  1604. raise ValueError
  1605. except ValueError as err:
  1606. if "non-fixed" in str(err):
  1607. # non-fixed frequencies are not meaningful for timedelta64;
  1608. # we retain that error message
  1609. raise err
  1610. # GH#11587 the main way this is reached is if the `np.array_equal`
  1611. # check above is False. This can also be reached if index[0]
  1612. # is `NaT`, in which case the call to `cls._generate_range` will
  1613. # raise a ValueError, which we re-raise with a more targeted
  1614. # message.
  1615. raise ValueError(
  1616. f"Inferred frequency {inferred} from passed values "
  1617. f"does not conform to passed frequency {freq.freqstr}"
  1618. ) from err
  1619. @classmethod
  1620. def _generate_range(
  1621. cls: type[DatetimeLikeArrayT], start, end, periods, freq, *args, **kwargs
  1622. ) -> DatetimeLikeArrayT:
  1623. raise AbstractMethodError(cls)
  1624. # --------------------------------------------------------------
  1625. @cache_readonly
  1626. def _creso(self) -> int:
  1627. return get_unit_from_dtype(self._ndarray.dtype)
  1628. @cache_readonly
  1629. def unit(self) -> str:
  1630. # e.g. "ns", "us", "ms"
  1631. # error: Argument 1 to "dtype_to_unit" has incompatible type
  1632. # "ExtensionDtype"; expected "Union[DatetimeTZDtype, dtype[Any]]"
  1633. return dtype_to_unit(self.dtype) # type: ignore[arg-type]
  1634. def as_unit(self: TimelikeOpsT, unit: str) -> TimelikeOpsT:
  1635. if unit not in ["s", "ms", "us", "ns"]:
  1636. raise ValueError("Supported units are 's', 'ms', 'us', 'ns'")
  1637. dtype = np.dtype(f"{self.dtype.kind}8[{unit}]")
  1638. new_values = astype_overflowsafe(self._ndarray, dtype, round_ok=True)
  1639. if isinstance(self.dtype, np.dtype):
  1640. new_dtype = new_values.dtype
  1641. else:
  1642. tz = cast("DatetimeArray", self).tz
  1643. new_dtype = DatetimeTZDtype(tz=tz, unit=unit)
  1644. # error: Unexpected keyword argument "freq" for "_simple_new" of
  1645. # "NDArrayBacked" [call-arg]
  1646. return type(self)._simple_new(
  1647. new_values, dtype=new_dtype, freq=self.freq # type: ignore[call-arg]
  1648. )
  1649. # TODO: annotate other as DatetimeArray | TimedeltaArray | Timestamp | Timedelta
  1650. # with the return type matching input type. TypeVar?
  1651. def _ensure_matching_resos(self, other):
  1652. if self._creso != other._creso:
  1653. # Just as with Timestamp/Timedelta, we cast to the higher resolution
  1654. if self._creso < other._creso:
  1655. self = self.as_unit(other.unit)
  1656. else:
  1657. other = other.as_unit(self.unit)
  1658. return self, other
  1659. # --------------------------------------------------------------
  1660. def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
  1661. if (
  1662. ufunc in [np.isnan, np.isinf, np.isfinite]
  1663. and len(inputs) == 1
  1664. and inputs[0] is self
  1665. ):
  1666. # numpy 1.18 changed isinf and isnan to not raise on dt64/td64
  1667. return getattr(ufunc, method)(self._ndarray, **kwargs)
  1668. return super().__array_ufunc__(ufunc, method, *inputs, **kwargs)
  1669. def _round(self, freq, mode, ambiguous, nonexistent):
  1670. # round the local times
  1671. if is_datetime64tz_dtype(self.dtype):
  1672. # operate on naive timestamps, then convert back to aware
  1673. self = cast("DatetimeArray", self)
  1674. naive = self.tz_localize(None)
  1675. result = naive._round(freq, mode, ambiguous, nonexistent)
  1676. return result.tz_localize(
  1677. self.tz, ambiguous=ambiguous, nonexistent=nonexistent
  1678. )
  1679. values = self.view("i8")
  1680. values = cast(np.ndarray, values)
  1681. offset = to_offset(freq)
  1682. offset.nanos # raises on non-fixed frequencies
  1683. nanos = delta_to_nanoseconds(offset, self._creso)
  1684. if nanos == 0:
  1685. # GH 52761
  1686. return self.copy()
  1687. result_i8 = round_nsint64(values, mode, nanos)
  1688. result = self._maybe_mask_results(result_i8, fill_value=iNaT)
  1689. result = result.view(self._ndarray.dtype)
  1690. return self._simple_new(result, dtype=self.dtype)
  1691. @Appender((_round_doc + _round_example).format(op="round"))
  1692. def round(
  1693. self,
  1694. freq,
  1695. ambiguous: TimeAmbiguous = "raise",
  1696. nonexistent: TimeNonexistent = "raise",
  1697. ):
  1698. return self._round(freq, RoundTo.NEAREST_HALF_EVEN, ambiguous, nonexistent)
  1699. @Appender((_round_doc + _floor_example).format(op="floor"))
  1700. def floor(
  1701. self,
  1702. freq,
  1703. ambiguous: TimeAmbiguous = "raise",
  1704. nonexistent: TimeNonexistent = "raise",
  1705. ):
  1706. return self._round(freq, RoundTo.MINUS_INFTY, ambiguous, nonexistent)
  1707. @Appender((_round_doc + _ceil_example).format(op="ceil"))
  1708. def ceil(
  1709. self,
  1710. freq,
  1711. ambiguous: TimeAmbiguous = "raise",
  1712. nonexistent: TimeNonexistent = "raise",
  1713. ):
  1714. return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)
  1715. # --------------------------------------------------------------
  1716. # Reductions
  1717. def any(self, *, axis: AxisInt | None = None, skipna: bool = True) -> bool:
  1718. # GH#34479 the nanops call will issue a FutureWarning for non-td64 dtype
  1719. return nanops.nanany(self._ndarray, axis=axis, skipna=skipna, mask=self.isna())
  1720. def all(self, *, axis: AxisInt | None = None, skipna: bool = True) -> bool:
  1721. # GH#34479 the nanops call will issue a FutureWarning for non-td64 dtype
  1722. return nanops.nanall(self._ndarray, axis=axis, skipna=skipna, mask=self.isna())
  1723. # --------------------------------------------------------------
  1724. # Frequency Methods
  1725. def _maybe_clear_freq(self) -> None:
  1726. self._freq = None
  1727. def _with_freq(self, freq):
  1728. """
  1729. Helper to get a view on the same data, with a new freq.
  1730. Parameters
  1731. ----------
  1732. freq : DateOffset, None, or "infer"
  1733. Returns
  1734. -------
  1735. Same type as self
  1736. """
  1737. # GH#29843
  1738. if freq is None:
  1739. # Always valid
  1740. pass
  1741. elif len(self) == 0 and isinstance(freq, BaseOffset):
  1742. # Always valid. In the TimedeltaArray case, we assume this
  1743. # is a Tick offset.
  1744. pass
  1745. else:
  1746. # As an internal method, we can ensure this assertion always holds
  1747. assert freq == "infer"
  1748. freq = to_offset(self.inferred_freq)
  1749. arr = self.view()
  1750. arr._freq = freq
  1751. return arr
  1752. # --------------------------------------------------------------
  1753. def factorize(
  1754. self,
  1755. use_na_sentinel: bool = True,
  1756. sort: bool = False,
  1757. ):
  1758. if self.freq is not None:
  1759. # We must be unique, so can short-circuit (and retain freq)
  1760. codes = np.arange(len(self), dtype=np.intp)
  1761. uniques = self.copy() # TODO: copy or view?
  1762. if sort and self.freq.n < 0:
  1763. codes = codes[::-1]
  1764. uniques = uniques[::-1]
  1765. return codes, uniques
  1766. # FIXME: shouldn't get here; we are ignoring sort
  1767. return super().factorize(use_na_sentinel=use_na_sentinel)
  1768. # -------------------------------------------------------------------
  1769. # Shared Constructor Helpers
  1770. def ensure_arraylike_for_datetimelike(data, copy: bool, cls_name: str):
  1771. if not hasattr(data, "dtype"):
  1772. # e.g. list, tuple
  1773. if not isinstance(data, (list, tuple)) and np.ndim(data) == 0:
  1774. # i.e. generator
  1775. data = list(data)
  1776. data = np.asarray(data)
  1777. copy = False
  1778. elif isinstance(data, ABCMultiIndex):
  1779. raise TypeError(f"Cannot create a {cls_name} from a MultiIndex.")
  1780. else:
  1781. data = extract_array(data, extract_numpy=True)
  1782. if isinstance(data, IntegerArray) or (
  1783. isinstance(data, ArrowExtensionArray) and data.dtype.kind in "iu"
  1784. ):
  1785. data = data.to_numpy("int64", na_value=iNaT)
  1786. copy = False
  1787. elif not isinstance(data, (np.ndarray, ExtensionArray)) or isinstance(
  1788. data, ArrowExtensionArray
  1789. ):
  1790. # GH#24539 e.g. xarray, dask object
  1791. data = np.asarray(data)
  1792. elif isinstance(data, ABCCategorical):
  1793. # GH#18664 preserve tz in going DTI->Categorical->DTI
  1794. # TODO: cases where we need to do another pass through maybe_convert_dtype,
  1795. # e.g. the categories are timedelta64s
  1796. data = data.categories.take(data.codes, fill_value=NaT)._values
  1797. copy = False
  1798. return data, copy
  1799. @overload
  1800. def validate_periods(periods: None) -> None:
  1801. ...
  1802. @overload
  1803. def validate_periods(periods: int | float) -> int:
  1804. ...
  1805. def validate_periods(periods: int | float | None) -> int | None:
  1806. """
  1807. If a `periods` argument is passed to the Datetime/Timedelta Array/Index
  1808. constructor, cast it to an integer.
  1809. Parameters
  1810. ----------
  1811. periods : None, float, int
  1812. Returns
  1813. -------
  1814. periods : None or int
  1815. Raises
  1816. ------
  1817. TypeError
  1818. if periods is None, float, or int
  1819. """
  1820. if periods is not None:
  1821. if lib.is_float(periods):
  1822. periods = int(periods)
  1823. elif not lib.is_integer(periods):
  1824. raise TypeError(f"periods must be a number, got {periods}")
  1825. periods = cast(int, periods)
  1826. return periods
  1827. def validate_inferred_freq(
  1828. freq, inferred_freq, freq_infer
  1829. ) -> tuple[BaseOffset | None, bool]:
  1830. """
  1831. If the user passes a freq and another freq is inferred from passed data,
  1832. require that they match.
  1833. Parameters
  1834. ----------
  1835. freq : DateOffset or None
  1836. inferred_freq : DateOffset or None
  1837. freq_infer : bool
  1838. Returns
  1839. -------
  1840. freq : DateOffset or None
  1841. freq_infer : bool
  1842. Notes
  1843. -----
  1844. We assume at this point that `maybe_infer_freq` has been called, so
  1845. `freq` is either a DateOffset object or None.
  1846. """
  1847. if inferred_freq is not None:
  1848. if freq is not None and freq != inferred_freq:
  1849. raise ValueError(
  1850. f"Inferred frequency {inferred_freq} from passed "
  1851. "values does not conform to passed frequency "
  1852. f"{freq.freqstr}"
  1853. )
  1854. if freq is None:
  1855. freq = inferred_freq
  1856. freq_infer = False
  1857. return freq, freq_infer
  1858. def maybe_infer_freq(freq):
  1859. """
  1860. Comparing a DateOffset to the string "infer" raises, so we need to
  1861. be careful about comparisons. Make a dummy variable `freq_infer` to
  1862. signify the case where the given freq is "infer" and set freq to None
  1863. to avoid comparison trouble later on.
  1864. Parameters
  1865. ----------
  1866. freq : {DateOffset, None, str}
  1867. Returns
  1868. -------
  1869. freq : {DateOffset, None}
  1870. freq_infer : bool
  1871. Whether we should inherit the freq of passed data.
  1872. """
  1873. freq_infer = False
  1874. if not isinstance(freq, BaseOffset):
  1875. # if a passed freq is None, don't infer automatically
  1876. if freq != "infer":
  1877. freq = to_offset(freq)
  1878. else:
  1879. freq_infer = True
  1880. freq = None
  1881. return freq, freq_infer
  1882. def dtype_to_unit(dtype: DatetimeTZDtype | np.dtype) -> str:
  1883. """
  1884. Return the unit str corresponding to the dtype's resolution.
  1885. Parameters
  1886. ----------
  1887. dtype : DatetimeTZDtype or np.dtype
  1888. If np.dtype, we assume it is a datetime64 dtype.
  1889. Returns
  1890. -------
  1891. str
  1892. """
  1893. if isinstance(dtype, DatetimeTZDtype):
  1894. return dtype.unit
  1895. return np.datetime_data(dtype)[0]