datetimes.py 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064
  1. from __future__ import annotations
  2. import datetime as dt
  3. import operator
  4. from typing import (
  5. TYPE_CHECKING,
  6. Hashable,
  7. )
  8. import warnings
  9. import numpy as np
  10. import pytz
  11. from pandas._libs import (
  12. NaT,
  13. Period,
  14. Timestamp,
  15. index as libindex,
  16. lib,
  17. )
  18. from pandas._libs.tslibs import (
  19. Resolution,
  20. periods_per_day,
  21. timezones,
  22. to_offset,
  23. )
  24. from pandas._libs.tslibs.offsets import prefix_mapping
  25. from pandas._typing import (
  26. Dtype,
  27. DtypeObj,
  28. Frequency,
  29. IntervalClosedType,
  30. TimeAmbiguous,
  31. TimeNonexistent,
  32. npt,
  33. )
  34. from pandas.util._decorators import (
  35. cache_readonly,
  36. doc,
  37. )
  38. from pandas.core.dtypes.common import (
  39. is_datetime64_dtype,
  40. is_datetime64tz_dtype,
  41. is_scalar,
  42. )
  43. from pandas.core.dtypes.generic import ABCSeries
  44. from pandas.core.dtypes.missing import is_valid_na_for_dtype
  45. from pandas.core.arrays.datetimes import (
  46. DatetimeArray,
  47. tz_to_dtype,
  48. )
  49. import pandas.core.common as com
  50. from pandas.core.indexes.base import (
  51. Index,
  52. maybe_extract_name,
  53. )
  54. from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin
  55. from pandas.core.indexes.extension import inherit_names
  56. from pandas.core.tools.times import to_time
  57. if TYPE_CHECKING:
  58. from pandas.core.api import (
  59. DataFrame,
  60. PeriodIndex,
  61. )
  62. def _new_DatetimeIndex(cls, d):
  63. """
  64. This is called upon unpickling, rather than the default which doesn't
  65. have arguments and breaks __new__
  66. """
  67. if "data" in d and not isinstance(d["data"], DatetimeIndex):
  68. # Avoid need to verify integrity by calling simple_new directly
  69. data = d.pop("data")
  70. if not isinstance(data, DatetimeArray):
  71. # For backward compat with older pickles, we may need to construct
  72. # a DatetimeArray to adapt to the newer _simple_new signature
  73. tz = d.pop("tz")
  74. freq = d.pop("freq")
  75. dta = DatetimeArray._simple_new(data, dtype=tz_to_dtype(tz), freq=freq)
  76. else:
  77. dta = data
  78. for key in ["tz", "freq"]:
  79. # These are already stored in our DatetimeArray; if they are
  80. # also in the pickle and don't match, we have a problem.
  81. if key in d:
  82. assert d[key] == getattr(dta, key)
  83. d.pop(key)
  84. result = cls._simple_new(dta, **d)
  85. else:
  86. with warnings.catch_warnings():
  87. # TODO: If we knew what was going in to **d, we might be able to
  88. # go through _simple_new instead
  89. warnings.simplefilter("ignore")
  90. result = cls.__new__(cls, **d)
  91. return result
  92. @inherit_names(
  93. DatetimeArray._field_ops
  94. + [
  95. method
  96. for method in DatetimeArray._datetimelike_methods
  97. if method not in ("tz_localize", "tz_convert", "strftime")
  98. ],
  99. DatetimeArray,
  100. wrap=True,
  101. )
  102. @inherit_names(["is_normalized"], DatetimeArray, cache=True)
  103. @inherit_names(
  104. [
  105. "tz",
  106. "tzinfo",
  107. "dtype",
  108. "to_pydatetime",
  109. "_format_native_types",
  110. "date",
  111. "time",
  112. "timetz",
  113. "std",
  114. ]
  115. + DatetimeArray._bool_ops,
  116. DatetimeArray,
  117. )
  118. class DatetimeIndex(DatetimeTimedeltaMixin):
  119. """
  120. Immutable ndarray-like of datetime64 data.
  121. Represented internally as int64, and which can be boxed to Timestamp objects
  122. that are subclasses of datetime and carry metadata.
  123. .. versionchanged:: 2.0.0
  124. The various numeric date/time attributes (:attr:`~DatetimeIndex.day`,
  125. :attr:`~DatetimeIndex.month`, :attr:`~DatetimeIndex.year` etc.) now have dtype
  126. ``int32``. Previously they had dtype ``int64``.
  127. Parameters
  128. ----------
  129. data : array-like (1-dimensional)
  130. Datetime-like data to construct index with.
  131. freq : str or pandas offset object, optional
  132. One of pandas date offset strings or corresponding objects. The string
  133. 'infer' can be passed in order to set the frequency of the index as the
  134. inferred frequency upon creation.
  135. tz : pytz.timezone or dateutil.tz.tzfile or datetime.tzinfo or str
  136. Set the Timezone of the data.
  137. normalize : bool, default False
  138. Normalize start/end dates to midnight before generating date range.
  139. closed : {'left', 'right'}, optional
  140. Set whether to include `start` and `end` that are on the
  141. boundary. The default includes boundary points on either end.
  142. ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
  143. When clocks moved backward due to DST, ambiguous times may arise.
  144. For example in Central European Time (UTC+01), when going from 03:00
  145. DST to 02:00 non-DST, 02:30:00 local time occurs both at 00:30:00 UTC
  146. and at 01:30:00 UTC. In such a situation, the `ambiguous` parameter
  147. dictates how ambiguous times should be handled.
  148. - 'infer' will attempt to infer fall dst-transition hours based on
  149. order
  150. - bool-ndarray where True signifies a DST time, False signifies a
  151. non-DST time (note that this flag is only applicable for ambiguous
  152. times)
  153. - 'NaT' will return NaT where there are ambiguous times
  154. - 'raise' will raise an AmbiguousTimeError if there are ambiguous times.
  155. dayfirst : bool, default False
  156. If True, parse dates in `data` with the day first order.
  157. yearfirst : bool, default False
  158. If True parse dates in `data` with the year first order.
  159. dtype : numpy.dtype or DatetimeTZDtype or str, default None
  160. Note that the only NumPy dtype allowed is ‘datetime64[ns]’.
  161. copy : bool, default False
  162. Make a copy of input ndarray.
  163. name : label, default None
  164. Name to be stored in the index.
  165. Attributes
  166. ----------
  167. year
  168. month
  169. day
  170. hour
  171. minute
  172. second
  173. microsecond
  174. nanosecond
  175. date
  176. time
  177. timetz
  178. dayofyear
  179. day_of_year
  180. weekofyear
  181. week
  182. dayofweek
  183. day_of_week
  184. weekday
  185. quarter
  186. tz
  187. freq
  188. freqstr
  189. is_month_start
  190. is_month_end
  191. is_quarter_start
  192. is_quarter_end
  193. is_year_start
  194. is_year_end
  195. is_leap_year
  196. inferred_freq
  197. Methods
  198. -------
  199. normalize
  200. strftime
  201. snap
  202. tz_convert
  203. tz_localize
  204. round
  205. floor
  206. ceil
  207. to_period
  208. to_pydatetime
  209. to_series
  210. to_frame
  211. month_name
  212. day_name
  213. mean
  214. std
  215. See Also
  216. --------
  217. Index : The base pandas Index type.
  218. TimedeltaIndex : Index of timedelta64 data.
  219. PeriodIndex : Index of Period data.
  220. to_datetime : Convert argument to datetime.
  221. date_range : Create a fixed-frequency DatetimeIndex.
  222. Notes
  223. -----
  224. To learn more about the frequency strings, please see `this link
  225. <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
  226. """
  227. _typ = "datetimeindex"
  228. _data_cls = DatetimeArray
  229. _supports_partial_string_indexing = True
  230. @property
  231. def _engine_type(self) -> type[libindex.DatetimeEngine]:
  232. return libindex.DatetimeEngine
  233. _data: DatetimeArray
  234. tz: dt.tzinfo | None
  235. # --------------------------------------------------------------------
  236. # methods that dispatch to DatetimeArray and wrap result
  237. @doc(DatetimeArray.strftime)
  238. def strftime(self, date_format) -> Index:
  239. arr = self._data.strftime(date_format)
  240. return Index(arr, name=self.name, dtype=object)
  241. @doc(DatetimeArray.tz_convert)
  242. def tz_convert(self, tz) -> DatetimeIndex:
  243. arr = self._data.tz_convert(tz)
  244. return type(self)._simple_new(arr, name=self.name, refs=self._references)
  245. @doc(DatetimeArray.tz_localize)
  246. def tz_localize(
  247. self,
  248. tz,
  249. ambiguous: TimeAmbiguous = "raise",
  250. nonexistent: TimeNonexistent = "raise",
  251. ) -> DatetimeIndex:
  252. arr = self._data.tz_localize(tz, ambiguous, nonexistent)
  253. return type(self)._simple_new(arr, name=self.name)
  254. @doc(DatetimeArray.to_period)
  255. def to_period(self, freq=None) -> PeriodIndex:
  256. from pandas.core.indexes.api import PeriodIndex
  257. arr = self._data.to_period(freq)
  258. return PeriodIndex._simple_new(arr, name=self.name)
  259. @doc(DatetimeArray.to_julian_date)
  260. def to_julian_date(self) -> Index:
  261. arr = self._data.to_julian_date()
  262. return Index._simple_new(arr, name=self.name)
  263. @doc(DatetimeArray.isocalendar)
  264. def isocalendar(self) -> DataFrame:
  265. df = self._data.isocalendar()
  266. return df.set_index(self)
  267. @cache_readonly
  268. def _resolution_obj(self) -> Resolution:
  269. return self._data._resolution_obj
  270. # --------------------------------------------------------------------
  271. # Constructors
  272. def __new__(
  273. cls,
  274. data=None,
  275. freq: Frequency | lib.NoDefault = lib.no_default,
  276. tz=lib.no_default,
  277. normalize: bool = False,
  278. closed=None,
  279. ambiguous: TimeAmbiguous = "raise",
  280. dayfirst: bool = False,
  281. yearfirst: bool = False,
  282. dtype: Dtype | None = None,
  283. copy: bool = False,
  284. name: Hashable = None,
  285. ) -> DatetimeIndex:
  286. if is_scalar(data):
  287. cls._raise_scalar_data_error(data)
  288. # - Cases checked above all return/raise before reaching here - #
  289. name = maybe_extract_name(name, data, cls)
  290. if (
  291. isinstance(data, DatetimeArray)
  292. and freq is lib.no_default
  293. and tz is lib.no_default
  294. and dtype is None
  295. ):
  296. # fastpath, similar logic in TimedeltaIndex.__new__;
  297. # Note in this particular case we retain non-nano.
  298. if copy:
  299. data = data.copy()
  300. return cls._simple_new(data, name=name)
  301. dtarr = DatetimeArray._from_sequence_not_strict(
  302. data,
  303. dtype=dtype,
  304. copy=copy,
  305. tz=tz,
  306. freq=freq,
  307. dayfirst=dayfirst,
  308. yearfirst=yearfirst,
  309. ambiguous=ambiguous,
  310. )
  311. refs = None
  312. if not copy and isinstance(data, (Index, ABCSeries)):
  313. refs = data._references
  314. subarr = cls._simple_new(dtarr, name=name, refs=refs)
  315. return subarr
  316. # --------------------------------------------------------------------
  317. @cache_readonly
  318. def _is_dates_only(self) -> bool:
  319. """
  320. Return a boolean if we are only dates (and don't have a timezone)
  321. Returns
  322. -------
  323. bool
  324. """
  325. from pandas.io.formats.format import is_dates_only
  326. # error: Argument 1 to "is_dates_only" has incompatible type
  327. # "Union[ExtensionArray, ndarray]"; expected "Union[ndarray,
  328. # DatetimeArray, Index, DatetimeIndex]"
  329. return self.tz is None and is_dates_only(self._values) # type: ignore[arg-type]
  330. def __reduce__(self):
  331. d = {"data": self._data, "name": self.name}
  332. return _new_DatetimeIndex, (type(self), d), None
  333. def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
  334. """
  335. Can we compare values of the given dtype to our own?
  336. """
  337. if self.tz is not None:
  338. # If we have tz, we can compare to tzaware
  339. return is_datetime64tz_dtype(dtype)
  340. # if we dont have tz, we can only compare to tznaive
  341. return is_datetime64_dtype(dtype)
  342. # --------------------------------------------------------------------
  343. # Rendering Methods
  344. @property
  345. def _formatter_func(self):
  346. from pandas.io.formats.format import get_format_datetime64
  347. formatter = get_format_datetime64(is_dates_only_=self._is_dates_only)
  348. return lambda x: f"'{formatter(x)}'"
  349. # --------------------------------------------------------------------
  350. # Set Operation Methods
  351. def _can_range_setop(self, other) -> bool:
  352. # GH 46702: If self or other have non-UTC tzs, DST transitions prevent
  353. # range representation due to no singular step
  354. if (
  355. self.tz is not None
  356. and not timezones.is_utc(self.tz)
  357. and not timezones.is_fixed_offset(self.tz)
  358. ):
  359. return False
  360. if (
  361. other.tz is not None
  362. and not timezones.is_utc(other.tz)
  363. and not timezones.is_fixed_offset(other.tz)
  364. ):
  365. return False
  366. return super()._can_range_setop(other)
  367. # --------------------------------------------------------------------
  368. def _get_time_micros(self) -> npt.NDArray[np.int64]:
  369. """
  370. Return the number of microseconds since midnight.
  371. Returns
  372. -------
  373. ndarray[int64_t]
  374. """
  375. values = self._data._local_timestamps()
  376. ppd = periods_per_day(self._data._creso)
  377. frac = values % ppd
  378. if self.unit == "ns":
  379. micros = frac // 1000
  380. elif self.unit == "us":
  381. micros = frac
  382. elif self.unit == "ms":
  383. micros = frac * 1000
  384. elif self.unit == "s":
  385. micros = frac * 1_000_000
  386. else: # pragma: no cover
  387. raise NotImplementedError(self.unit)
  388. micros[self._isnan] = -1
  389. return micros
  390. def snap(self, freq: Frequency = "S") -> DatetimeIndex:
  391. """
  392. Snap time stamps to nearest occurring frequency.
  393. Returns
  394. -------
  395. DatetimeIndex
  396. """
  397. # Superdumb, punting on any optimizing
  398. freq = to_offset(freq)
  399. dta = self._data.copy()
  400. for i, v in enumerate(self):
  401. s = v
  402. if not freq.is_on_offset(s):
  403. t0 = freq.rollback(s)
  404. t1 = freq.rollforward(s)
  405. if abs(s - t0) < abs(t1 - s):
  406. s = t0
  407. else:
  408. s = t1
  409. dta[i] = s
  410. return DatetimeIndex._simple_new(dta, name=self.name)
  411. # --------------------------------------------------------------------
  412. # Indexing Methods
  413. def _parsed_string_to_bounds(self, reso: Resolution, parsed: dt.datetime):
  414. """
  415. Calculate datetime bounds for parsed time string and its resolution.
  416. Parameters
  417. ----------
  418. reso : Resolution
  419. Resolution provided by parsed string.
  420. parsed : datetime
  421. Datetime from parsed string.
  422. Returns
  423. -------
  424. lower, upper: pd.Timestamp
  425. """
  426. per = Period(parsed, freq=reso.attr_abbrev)
  427. start, end = per.start_time, per.end_time
  428. # GH 24076
  429. # If an incoming date string contained a UTC offset, need to localize
  430. # the parsed date to this offset first before aligning with the index's
  431. # timezone
  432. start = start.tz_localize(parsed.tzinfo)
  433. end = end.tz_localize(parsed.tzinfo)
  434. if parsed.tzinfo is not None:
  435. if self.tz is None:
  436. raise ValueError(
  437. "The index must be timezone aware when indexing "
  438. "with a date string with a UTC offset"
  439. )
  440. # The flipped case with parsed.tz is None and self.tz is not None
  441. # is ruled out bc parsed and reso are produced by _parse_with_reso,
  442. # which localizes parsed.
  443. return start, end
  444. def _parse_with_reso(self, label: str):
  445. parsed, reso = super()._parse_with_reso(label)
  446. parsed = Timestamp(parsed)
  447. if self.tz is not None and parsed.tzinfo is None:
  448. # we special-case timezone-naive strings and timezone-aware
  449. # DatetimeIndex
  450. # https://github.com/pandas-dev/pandas/pull/36148#issuecomment-687883081
  451. parsed = parsed.tz_localize(self.tz)
  452. return parsed, reso
  453. def _disallow_mismatched_indexing(self, key) -> None:
  454. """
  455. Check for mismatched-tzawareness indexing and re-raise as KeyError.
  456. """
  457. # we get here with isinstance(key, self._data._recognized_scalars)
  458. try:
  459. # GH#36148
  460. self._data._assert_tzawareness_compat(key)
  461. except TypeError as err:
  462. raise KeyError(key) from err
  463. def get_loc(self, key):
  464. """
  465. Get integer location for requested label
  466. Returns
  467. -------
  468. loc : int
  469. """
  470. self._check_indexing_error(key)
  471. orig_key = key
  472. if is_valid_na_for_dtype(key, self.dtype):
  473. key = NaT
  474. if isinstance(key, self._data._recognized_scalars):
  475. # needed to localize naive datetimes
  476. self._disallow_mismatched_indexing(key)
  477. key = Timestamp(key)
  478. elif isinstance(key, str):
  479. try:
  480. parsed, reso = self._parse_with_reso(key)
  481. except (ValueError, pytz.NonExistentTimeError) as err:
  482. raise KeyError(key) from err
  483. self._disallow_mismatched_indexing(parsed)
  484. if self._can_partial_date_slice(reso):
  485. try:
  486. return self._partial_date_slice(reso, parsed)
  487. except KeyError as err:
  488. raise KeyError(key) from err
  489. key = parsed
  490. elif isinstance(key, dt.timedelta):
  491. # GH#20464
  492. raise TypeError(
  493. f"Cannot index {type(self).__name__} with {type(key).__name__}"
  494. )
  495. elif isinstance(key, dt.time):
  496. return self.indexer_at_time(key)
  497. else:
  498. # unrecognized type
  499. raise KeyError(key)
  500. try:
  501. return Index.get_loc(self, key)
  502. except KeyError as err:
  503. raise KeyError(orig_key) from err
  504. @doc(DatetimeTimedeltaMixin._maybe_cast_slice_bound)
  505. def _maybe_cast_slice_bound(self, label, side: str):
  506. # GH#42855 handle date here instead of get_slice_bound
  507. if isinstance(label, dt.date) and not isinstance(label, dt.datetime):
  508. # Pandas supports slicing with dates, treated as datetimes at midnight.
  509. # https://github.com/pandas-dev/pandas/issues/31501
  510. label = Timestamp(label).to_pydatetime()
  511. label = super()._maybe_cast_slice_bound(label, side)
  512. self._data._assert_tzawareness_compat(label)
  513. return Timestamp(label)
  514. def slice_indexer(self, start=None, end=None, step=None):
  515. """
  516. Return indexer for specified label slice.
  517. Index.slice_indexer, customized to handle time slicing.
  518. In addition to functionality provided by Index.slice_indexer, does the
  519. following:
  520. - if both `start` and `end` are instances of `datetime.time`, it
  521. invokes `indexer_between_time`
  522. - if `start` and `end` are both either string or None perform
  523. value-based selection in non-monotonic cases.
  524. """
  525. # For historical reasons DatetimeIndex supports slices between two
  526. # instances of datetime.time as if it were applying a slice mask to
  527. # an array of (self.hour, self.minute, self.seconds, self.microsecond).
  528. if isinstance(start, dt.time) and isinstance(end, dt.time):
  529. if step is not None and step != 1:
  530. raise ValueError("Must have step size of 1 with time slices")
  531. return self.indexer_between_time(start, end)
  532. if isinstance(start, dt.time) or isinstance(end, dt.time):
  533. raise KeyError("Cannot mix time and non-time slice keys")
  534. def check_str_or_none(point) -> bool:
  535. return point is not None and not isinstance(point, str)
  536. # GH#33146 if start and end are combinations of str and None and Index is not
  537. # monotonic, we can not use Index.slice_indexer because it does not honor the
  538. # actual elements, is only searching for start and end
  539. if (
  540. check_str_or_none(start)
  541. or check_str_or_none(end)
  542. or self.is_monotonic_increasing
  543. ):
  544. return Index.slice_indexer(self, start, end, step)
  545. mask = np.array(True)
  546. raise_mask = np.array(True)
  547. if start is not None:
  548. start_casted = self._maybe_cast_slice_bound(start, "left")
  549. mask = start_casted <= self
  550. raise_mask = start_casted == self
  551. if end is not None:
  552. end_casted = self._maybe_cast_slice_bound(end, "right")
  553. mask = (self <= end_casted) & mask
  554. raise_mask = (end_casted == self) | raise_mask
  555. if not raise_mask.any():
  556. raise KeyError(
  557. "Value based partial slicing on non-monotonic DatetimeIndexes "
  558. "with non-existing keys is not allowed.",
  559. )
  560. indexer = mask.nonzero()[0][::step]
  561. if len(indexer) == len(self):
  562. return slice(None)
  563. else:
  564. return indexer
  565. # --------------------------------------------------------------------
  566. @property
  567. def inferred_type(self) -> str:
  568. # b/c datetime is represented as microseconds since the epoch, make
  569. # sure we can't have ambiguous indexing
  570. return "datetime64"
  571. def indexer_at_time(self, time, asof: bool = False) -> npt.NDArray[np.intp]:
  572. """
  573. Return index locations of values at particular time of day.
  574. Parameters
  575. ----------
  576. time : datetime.time or str
  577. Time passed in either as object (datetime.time) or as string in
  578. appropriate format ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p",
  579. "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", "%I%M%S%p").
  580. Returns
  581. -------
  582. np.ndarray[np.intp]
  583. See Also
  584. --------
  585. indexer_between_time : Get index locations of values between particular
  586. times of day.
  587. DataFrame.at_time : Select values at particular time of day.
  588. """
  589. if asof:
  590. raise NotImplementedError("'asof' argument is not supported")
  591. if isinstance(time, str):
  592. from dateutil.parser import parse
  593. time = parse(time).time()
  594. if time.tzinfo:
  595. if self.tz is None:
  596. raise ValueError("Index must be timezone aware.")
  597. time_micros = self.tz_convert(time.tzinfo)._get_time_micros()
  598. else:
  599. time_micros = self._get_time_micros()
  600. micros = _time_to_micros(time)
  601. return (time_micros == micros).nonzero()[0]
  602. def indexer_between_time(
  603. self, start_time, end_time, include_start: bool = True, include_end: bool = True
  604. ) -> npt.NDArray[np.intp]:
  605. """
  606. Return index locations of values between particular times of day.
  607. Parameters
  608. ----------
  609. start_time, end_time : datetime.time, str
  610. Time passed either as object (datetime.time) or as string in
  611. appropriate format ("%H:%M", "%H%M", "%I:%M%p", "%I%M%p",
  612. "%H:%M:%S", "%H%M%S", "%I:%M:%S%p","%I%M%S%p").
  613. include_start : bool, default True
  614. include_end : bool, default True
  615. Returns
  616. -------
  617. np.ndarray[np.intp]
  618. See Also
  619. --------
  620. indexer_at_time : Get index locations of values at particular time of day.
  621. DataFrame.between_time : Select values between particular times of day.
  622. """
  623. start_time = to_time(start_time)
  624. end_time = to_time(end_time)
  625. time_micros = self._get_time_micros()
  626. start_micros = _time_to_micros(start_time)
  627. end_micros = _time_to_micros(end_time)
  628. if include_start and include_end:
  629. lop = rop = operator.le
  630. elif include_start:
  631. lop = operator.le
  632. rop = operator.lt
  633. elif include_end:
  634. lop = operator.lt
  635. rop = operator.le
  636. else:
  637. lop = rop = operator.lt
  638. if start_time <= end_time:
  639. join_op = operator.and_
  640. else:
  641. join_op = operator.or_
  642. mask = join_op(lop(start_micros, time_micros), rop(time_micros, end_micros))
  643. return mask.nonzero()[0]
  644. def date_range(
  645. start=None,
  646. end=None,
  647. periods=None,
  648. freq=None,
  649. tz=None,
  650. normalize: bool = False,
  651. name: Hashable = None,
  652. inclusive: IntervalClosedType = "both",
  653. *,
  654. unit: str | None = None,
  655. **kwargs,
  656. ) -> DatetimeIndex:
  657. """
  658. Return a fixed frequency DatetimeIndex.
  659. Returns the range of equally spaced time points (where the difference between any
  660. two adjacent points is specified by the given frequency) such that they all
  661. satisfy `start <[=] x <[=] end`, where the first one and the last one are, resp.,
  662. the first and last time points in that range that fall on the boundary of ``freq``
  663. (if given as a frequency string) or that are valid for ``freq`` (if given as a
  664. :class:`pandas.tseries.offsets.DateOffset`). (If exactly one of ``start``,
  665. ``end``, or ``freq`` is *not* specified, this missing parameter can be computed
  666. given ``periods``, the number of timesteps in the range. See the note below.)
  667. Parameters
  668. ----------
  669. start : str or datetime-like, optional
  670. Left bound for generating dates.
  671. end : str or datetime-like, optional
  672. Right bound for generating dates.
  673. periods : int, optional
  674. Number of periods to generate.
  675. freq : str, datetime.timedelta, or DateOffset, default 'D'
  676. Frequency strings can have multiples, e.g. '5H'. See
  677. :ref:`here <timeseries.offset_aliases>` for a list of
  678. frequency aliases.
  679. tz : str or tzinfo, optional
  680. Time zone name for returning localized DatetimeIndex, for example
  681. 'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is
  682. timezone-naive unless timezone-aware datetime-likes are passed.
  683. normalize : bool, default False
  684. Normalize start/end dates to midnight before generating date range.
  685. name : str, default None
  686. Name of the resulting DatetimeIndex.
  687. inclusive : {"both", "neither", "left", "right"}, default "both"
  688. Include boundaries; Whether to set each bound as closed or open.
  689. .. versionadded:: 1.4.0
  690. unit : str, default None
  691. Specify the desired resolution of the result.
  692. .. versionadded:: 2.0.0
  693. **kwargs
  694. For compatibility. Has no effect on the result.
  695. Returns
  696. -------
  697. DatetimeIndex
  698. See Also
  699. --------
  700. DatetimeIndex : An immutable container for datetimes.
  701. timedelta_range : Return a fixed frequency TimedeltaIndex.
  702. period_range : Return a fixed frequency PeriodIndex.
  703. interval_range : Return a fixed frequency IntervalIndex.
  704. Notes
  705. -----
  706. Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
  707. exactly three must be specified. If ``freq`` is omitted, the resulting
  708. ``DatetimeIndex`` will have ``periods`` linearly spaced elements between
  709. ``start`` and ``end`` (closed on both sides).
  710. To learn more about the frequency strings, please see `this link
  711. <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
  712. Examples
  713. --------
  714. **Specifying the values**
  715. The next four examples generate the same `DatetimeIndex`, but vary
  716. the combination of `start`, `end` and `periods`.
  717. Specify `start` and `end`, with the default daily frequency.
  718. >>> pd.date_range(start='1/1/2018', end='1/08/2018')
  719. DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
  720. '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'],
  721. dtype='datetime64[ns]', freq='D')
  722. Specify timezone-aware `start` and `end`, with the default daily frequency.
  723. >>> pd.date_range(
  724. ... start=pd.to_datetime("1/1/2018").tz_localize("Europe/Berlin"),
  725. ... end=pd.to_datetime("1/08/2018").tz_localize("Europe/Berlin"),
  726. ... )
  727. DatetimeIndex(['2018-01-01 00:00:00+01:00', '2018-01-02 00:00:00+01:00',
  728. '2018-01-03 00:00:00+01:00', '2018-01-04 00:00:00+01:00',
  729. '2018-01-05 00:00:00+01:00', '2018-01-06 00:00:00+01:00',
  730. '2018-01-07 00:00:00+01:00', '2018-01-08 00:00:00+01:00'],
  731. dtype='datetime64[ns, Europe/Berlin]', freq='D')
  732. Specify `start` and `periods`, the number of periods (days).
  733. >>> pd.date_range(start='1/1/2018', periods=8)
  734. DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
  735. '2018-01-05', '2018-01-06', '2018-01-07', '2018-01-08'],
  736. dtype='datetime64[ns]', freq='D')
  737. Specify `end` and `periods`, the number of periods (days).
  738. >>> pd.date_range(end='1/1/2018', periods=8)
  739. DatetimeIndex(['2017-12-25', '2017-12-26', '2017-12-27', '2017-12-28',
  740. '2017-12-29', '2017-12-30', '2017-12-31', '2018-01-01'],
  741. dtype='datetime64[ns]', freq='D')
  742. Specify `start`, `end`, and `periods`; the frequency is generated
  743. automatically (linearly spaced).
  744. >>> pd.date_range(start='2018-04-24', end='2018-04-27', periods=3)
  745. DatetimeIndex(['2018-04-24 00:00:00', '2018-04-25 12:00:00',
  746. '2018-04-27 00:00:00'],
  747. dtype='datetime64[ns]', freq=None)
  748. **Other Parameters**
  749. Changed the `freq` (frequency) to ``'M'`` (month end frequency).
  750. >>> pd.date_range(start='1/1/2018', periods=5, freq='M')
  751. DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31', '2018-04-30',
  752. '2018-05-31'],
  753. dtype='datetime64[ns]', freq='M')
  754. Multiples are allowed
  755. >>> pd.date_range(start='1/1/2018', periods=5, freq='3M')
  756. DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31',
  757. '2019-01-31'],
  758. dtype='datetime64[ns]', freq='3M')
  759. `freq` can also be specified as an Offset object.
  760. >>> pd.date_range(start='1/1/2018', periods=5, freq=pd.offsets.MonthEnd(3))
  761. DatetimeIndex(['2018-01-31', '2018-04-30', '2018-07-31', '2018-10-31',
  762. '2019-01-31'],
  763. dtype='datetime64[ns]', freq='3M')
  764. Specify `tz` to set the timezone.
  765. >>> pd.date_range(start='1/1/2018', periods=5, tz='Asia/Tokyo')
  766. DatetimeIndex(['2018-01-01 00:00:00+09:00', '2018-01-02 00:00:00+09:00',
  767. '2018-01-03 00:00:00+09:00', '2018-01-04 00:00:00+09:00',
  768. '2018-01-05 00:00:00+09:00'],
  769. dtype='datetime64[ns, Asia/Tokyo]', freq='D')
  770. `inclusive` controls whether to include `start` and `end` that are on the
  771. boundary. The default, "both", includes boundary points on either end.
  772. >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive="both")
  773. DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03', '2017-01-04'],
  774. dtype='datetime64[ns]', freq='D')
  775. Use ``inclusive='left'`` to exclude `end` if it falls on the boundary.
  776. >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='left')
  777. DatetimeIndex(['2017-01-01', '2017-01-02', '2017-01-03'],
  778. dtype='datetime64[ns]', freq='D')
  779. Use ``inclusive='right'`` to exclude `start` if it falls on the boundary, and
  780. similarly ``inclusive='neither'`` will exclude both `start` and `end`.
  781. >>> pd.date_range(start='2017-01-01', end='2017-01-04', inclusive='right')
  782. DatetimeIndex(['2017-01-02', '2017-01-03', '2017-01-04'],
  783. dtype='datetime64[ns]', freq='D')
  784. **Specify a unit**
  785. >>> pd.date_range(start="2017-01-01", periods=10, freq="100AS", unit="s")
  786. DatetimeIndex(['2017-01-01', '2117-01-01', '2217-01-01', '2317-01-01',
  787. '2417-01-01', '2517-01-01', '2617-01-01', '2717-01-01',
  788. '2817-01-01', '2917-01-01'],
  789. dtype='datetime64[s]', freq='100AS-JAN')
  790. """
  791. if freq is None and com.any_none(periods, start, end):
  792. freq = "D"
  793. dtarr = DatetimeArray._generate_range(
  794. start=start,
  795. end=end,
  796. periods=periods,
  797. freq=freq,
  798. tz=tz,
  799. normalize=normalize,
  800. inclusive=inclusive,
  801. unit=unit,
  802. **kwargs,
  803. )
  804. return DatetimeIndex._simple_new(dtarr, name=name)
  805. def bdate_range(
  806. start=None,
  807. end=None,
  808. periods: int | None = None,
  809. freq: Frequency = "B",
  810. tz=None,
  811. normalize: bool = True,
  812. name: Hashable = None,
  813. weekmask=None,
  814. holidays=None,
  815. inclusive: IntervalClosedType = "both",
  816. **kwargs,
  817. ) -> DatetimeIndex:
  818. """
  819. Return a fixed frequency DatetimeIndex with business day as the default.
  820. Parameters
  821. ----------
  822. start : str or datetime-like, default None
  823. Left bound for generating dates.
  824. end : str or datetime-like, default None
  825. Right bound for generating dates.
  826. periods : int, default None
  827. Number of periods to generate.
  828. freq : str, Timedelta, datetime.timedelta, or DateOffset, default 'B'
  829. Frequency strings can have multiples, e.g. '5H'. The default is
  830. business daily ('B').
  831. tz : str or None
  832. Time zone name for returning localized DatetimeIndex, for example
  833. Asia/Beijing.
  834. normalize : bool, default False
  835. Normalize start/end dates to midnight before generating date range.
  836. name : str, default None
  837. Name of the resulting DatetimeIndex.
  838. weekmask : str or None, default None
  839. Weekmask of valid business days, passed to ``numpy.busdaycalendar``,
  840. only used when custom frequency strings are passed. The default
  841. value None is equivalent to 'Mon Tue Wed Thu Fri'.
  842. holidays : list-like or None, default None
  843. Dates to exclude from the set of valid business days, passed to
  844. ``numpy.busdaycalendar``, only used when custom frequency strings
  845. are passed.
  846. inclusive : {"both", "neither", "left", "right"}, default "both"
  847. Include boundaries; Whether to set each bound as closed or open.
  848. .. versionadded:: 1.4.0
  849. **kwargs
  850. For compatibility. Has no effect on the result.
  851. Returns
  852. -------
  853. DatetimeIndex
  854. Notes
  855. -----
  856. Of the four parameters: ``start``, ``end``, ``periods``, and ``freq``,
  857. exactly three must be specified. Specifying ``freq`` is a requirement
  858. for ``bdate_range``. Use ``date_range`` if specifying ``freq`` is not
  859. desired.
  860. To learn more about the frequency strings, please see `this link
  861. <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
  862. Examples
  863. --------
  864. Note how the two weekend days are skipped in the result.
  865. >>> pd.bdate_range(start='1/1/2018', end='1/08/2018')
  866. DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
  867. '2018-01-05', '2018-01-08'],
  868. dtype='datetime64[ns]', freq='B')
  869. """
  870. if freq is None:
  871. msg = "freq must be specified for bdate_range; use date_range instead"
  872. raise TypeError(msg)
  873. if isinstance(freq, str) and freq.startswith("C"):
  874. try:
  875. weekmask = weekmask or "Mon Tue Wed Thu Fri"
  876. freq = prefix_mapping[freq](holidays=holidays, weekmask=weekmask)
  877. except (KeyError, TypeError) as err:
  878. msg = f"invalid custom frequency string: {freq}"
  879. raise ValueError(msg) from err
  880. elif holidays or weekmask:
  881. msg = (
  882. "a custom frequency string is required when holidays or "
  883. f"weekmask are passed, got frequency {freq}"
  884. )
  885. raise ValueError(msg)
  886. return date_range(
  887. start=start,
  888. end=end,
  889. periods=periods,
  890. freq=freq,
  891. tz=tz,
  892. normalize=normalize,
  893. name=name,
  894. inclusive=inclusive,
  895. **kwargs,
  896. )
  897. def _time_to_micros(time_obj: dt.time) -> int:
  898. seconds = time_obj.hour * 60 * 60 + 60 * time_obj.minute + time_obj.second
  899. return 1_000_000 * seconds + time_obj.microsecond