period.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547
  1. from __future__ import annotations
  2. from datetime import (
  3. datetime,
  4. timedelta,
  5. )
  6. from typing import Hashable
  7. import numpy as np
  8. from pandas._libs import index as libindex
  9. from pandas._libs.tslibs import (
  10. BaseOffset,
  11. NaT,
  12. Period,
  13. Resolution,
  14. Tick,
  15. )
  16. from pandas._typing import (
  17. Dtype,
  18. DtypeObj,
  19. npt,
  20. )
  21. from pandas.util._decorators import (
  22. cache_readonly,
  23. doc,
  24. )
  25. from pandas.core.dtypes.common import is_integer
  26. from pandas.core.dtypes.dtypes import PeriodDtype
  27. from pandas.core.dtypes.generic import ABCSeries
  28. from pandas.core.dtypes.missing import is_valid_na_for_dtype
  29. from pandas.core.arrays.period import (
  30. PeriodArray,
  31. period_array,
  32. raise_on_incompatible,
  33. validate_dtype_freq,
  34. )
  35. import pandas.core.common as com
  36. import pandas.core.indexes.base as ibase
  37. from pandas.core.indexes.base import maybe_extract_name
  38. from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
  39. from pandas.core.indexes.datetimes import (
  40. DatetimeIndex,
  41. Index,
  42. )
  43. from pandas.core.indexes.extension import inherit_names
  44. _index_doc_kwargs = dict(ibase._index_doc_kwargs)
  45. _index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"})
  46. _shared_doc_kwargs = {
  47. "klass": "PeriodArray",
  48. }
  49. # --- Period index sketch
  50. def _new_PeriodIndex(cls, **d):
  51. # GH13277 for unpickling
  52. values = d.pop("data")
  53. if values.dtype == "int64":
  54. freq = d.pop("freq", None)
  55. values = PeriodArray(values, freq=freq)
  56. return cls._simple_new(values, **d)
  57. else:
  58. return cls(values, **d)
  59. @inherit_names(
  60. ["strftime", "start_time", "end_time"] + PeriodArray._field_ops,
  61. PeriodArray,
  62. wrap=True,
  63. )
  64. @inherit_names(["is_leap_year", "_format_native_types"], PeriodArray)
  65. class PeriodIndex(DatetimeIndexOpsMixin):
  66. """
  67. Immutable ndarray holding ordinal values indicating regular periods in time.
  68. Index keys are boxed to Period objects which carries the metadata (eg,
  69. frequency information).
  70. Parameters
  71. ----------
  72. data : array-like (1d int np.ndarray or PeriodArray), optional
  73. Optional period-like data to construct index with.
  74. copy : bool
  75. Make a copy of input ndarray.
  76. freq : str or period object, optional
  77. One of pandas period strings or corresponding objects.
  78. year : int, array, or Series, default None
  79. month : int, array, or Series, default None
  80. quarter : int, array, or Series, default None
  81. day : int, array, or Series, default None
  82. hour : int, array, or Series, default None
  83. minute : int, array, or Series, default None
  84. second : int, array, or Series, default None
  85. dtype : str or PeriodDtype, default None
  86. Attributes
  87. ----------
  88. day
  89. dayofweek
  90. day_of_week
  91. dayofyear
  92. day_of_year
  93. days_in_month
  94. daysinmonth
  95. end_time
  96. freq
  97. freqstr
  98. hour
  99. is_leap_year
  100. minute
  101. month
  102. quarter
  103. qyear
  104. second
  105. start_time
  106. week
  107. weekday
  108. weekofyear
  109. year
  110. Methods
  111. -------
  112. asfreq
  113. strftime
  114. to_timestamp
  115. See Also
  116. --------
  117. Index : The base pandas Index type.
  118. Period : Represents a period of time.
  119. DatetimeIndex : Index with datetime64 data.
  120. TimedeltaIndex : Index of timedelta64 data.
  121. period_range : Create a fixed-frequency PeriodIndex.
  122. Examples
  123. --------
  124. >>> idx = pd.PeriodIndex(year=[2000, 2002], quarter=[1, 3])
  125. >>> idx
  126. PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]')
  127. """
  128. _typ = "periodindex"
  129. _data: PeriodArray
  130. freq: BaseOffset
  131. dtype: PeriodDtype
  132. _data_cls = PeriodArray
  133. _supports_partial_string_indexing = True
  134. @property
  135. def _engine_type(self) -> type[libindex.PeriodEngine]:
  136. return libindex.PeriodEngine
  137. @cache_readonly
  138. def _resolution_obj(self) -> Resolution:
  139. # for compat with DatetimeIndex
  140. return self.dtype._resolution_obj
  141. # --------------------------------------------------------------------
  142. # methods that dispatch to array and wrap result in Index
  143. # These are defined here instead of via inherit_names for mypy
  144. @doc(
  145. PeriodArray.asfreq,
  146. other="pandas.arrays.PeriodArray",
  147. other_name="PeriodArray",
  148. **_shared_doc_kwargs,
  149. )
  150. def asfreq(self, freq=None, how: str = "E") -> PeriodIndex:
  151. arr = self._data.asfreq(freq, how)
  152. return type(self)._simple_new(arr, name=self.name)
  153. @doc(PeriodArray.to_timestamp)
  154. def to_timestamp(self, freq=None, how: str = "start") -> DatetimeIndex:
  155. arr = self._data.to_timestamp(freq, how)
  156. return DatetimeIndex._simple_new(arr, name=self.name)
  157. @property
  158. @doc(PeriodArray.hour.fget)
  159. def hour(self) -> Index:
  160. return Index(self._data.hour, name=self.name)
  161. @property
  162. @doc(PeriodArray.minute.fget)
  163. def minute(self) -> Index:
  164. return Index(self._data.minute, name=self.name)
  165. @property
  166. @doc(PeriodArray.second.fget)
  167. def second(self) -> Index:
  168. return Index(self._data.second, name=self.name)
  169. # ------------------------------------------------------------------------
  170. # Index Constructors
  171. def __new__(
  172. cls,
  173. data=None,
  174. ordinal=None,
  175. freq=None,
  176. dtype: Dtype | None = None,
  177. copy: bool = False,
  178. name: Hashable = None,
  179. **fields,
  180. ) -> PeriodIndex:
  181. valid_field_set = {
  182. "year",
  183. "month",
  184. "day",
  185. "quarter",
  186. "hour",
  187. "minute",
  188. "second",
  189. }
  190. refs = None
  191. if not copy and isinstance(data, (Index, ABCSeries)):
  192. refs = data._references
  193. if not set(fields).issubset(valid_field_set):
  194. argument = list(set(fields) - valid_field_set)[0]
  195. raise TypeError(f"__new__() got an unexpected keyword argument {argument}")
  196. name = maybe_extract_name(name, data, cls)
  197. if data is None and ordinal is None:
  198. # range-based.
  199. if not fields:
  200. # test_pickle_compat_construction
  201. cls._raise_scalar_data_error(None)
  202. data, freq2 = PeriodArray._generate_range(None, None, None, freq, fields)
  203. # PeriodArray._generate range does validation that fields is
  204. # empty when really using the range-based constructor.
  205. freq = freq2
  206. data = PeriodArray(data, freq=freq)
  207. else:
  208. freq = validate_dtype_freq(dtype, freq)
  209. # PeriodIndex allow PeriodIndex(period_index, freq=different)
  210. # Let's not encourage that kind of behavior in PeriodArray.
  211. if freq and isinstance(data, cls) and data.freq != freq:
  212. # TODO: We can do some of these with no-copy / coercion?
  213. # e.g. D -> 2D seems to be OK
  214. data = data.asfreq(freq)
  215. if data is None and ordinal is not None:
  216. # we strangely ignore `ordinal` if data is passed.
  217. ordinal = np.asarray(ordinal, dtype=np.int64)
  218. data = PeriodArray(ordinal, freq=freq)
  219. else:
  220. # don't pass copy here, since we copy later.
  221. data = period_array(data=data, freq=freq)
  222. if copy:
  223. data = data.copy()
  224. return cls._simple_new(data, name=name, refs=refs)
  225. # ------------------------------------------------------------------------
  226. # Data
  227. @property
  228. def values(self) -> np.ndarray:
  229. return np.asarray(self, dtype=object)
  230. def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]:
  231. """
  232. Convert timedelta-like input to an integer multiple of self.freq
  233. Parameters
  234. ----------
  235. other : timedelta, np.timedelta64, DateOffset, int, np.ndarray
  236. Returns
  237. -------
  238. converted : int, np.ndarray[int64]
  239. Raises
  240. ------
  241. IncompatibleFrequency : if the input cannot be written as a multiple
  242. of self.freq. Note IncompatibleFrequency subclasses ValueError.
  243. """
  244. if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)):
  245. if isinstance(self.freq, Tick):
  246. # _check_timedeltalike_freq_compat will raise if incompatible
  247. delta = self._data._check_timedeltalike_freq_compat(other)
  248. return delta
  249. elif isinstance(other, BaseOffset):
  250. if other.base == self.freq.base:
  251. return other.n
  252. raise raise_on_incompatible(self, other)
  253. elif is_integer(other):
  254. # integer is passed to .shift via
  255. # _add_datetimelike_methods basically
  256. # but ufunc may pass integer to _add_delta
  257. return other
  258. # raise when input doesn't have freq
  259. raise raise_on_incompatible(self, None)
  260. def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
  261. """
  262. Can we compare values of the given dtype to our own?
  263. """
  264. if not isinstance(dtype, PeriodDtype):
  265. return False
  266. # For the subset of DateOffsets that can be a dtype.freq, it
  267. # suffices (and is much faster) to compare the dtype_code rather than
  268. # the freq itself.
  269. # See also: PeriodDtype.__eq__
  270. freq = dtype.freq
  271. own_freq = self.freq
  272. return (
  273. freq._period_dtype_code
  274. # error: "BaseOffset" has no attribute "_period_dtype_code"
  275. == own_freq._period_dtype_code # type: ignore[attr-defined]
  276. and freq.n == own_freq.n
  277. )
  278. # ------------------------------------------------------------------------
  279. # Index Methods
  280. def asof_locs(self, where: Index, mask: npt.NDArray[np.bool_]) -> np.ndarray:
  281. """
  282. where : array of timestamps
  283. mask : np.ndarray[bool]
  284. Array of booleans where data is not NA.
  285. """
  286. if isinstance(where, DatetimeIndex):
  287. where = PeriodIndex(where._values, freq=self.freq)
  288. elif not isinstance(where, PeriodIndex):
  289. raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex")
  290. return super().asof_locs(where, mask)
  291. @property
  292. def is_full(self) -> bool:
  293. """
  294. Returns True if this PeriodIndex is range-like in that all Periods
  295. between start and end are present, in order.
  296. """
  297. if len(self) == 0:
  298. return True
  299. if not self.is_monotonic_increasing:
  300. raise ValueError("Index is not monotonic")
  301. values = self.asi8
  302. return bool(((values[1:] - values[:-1]) < 2).all())
  303. @property
  304. def inferred_type(self) -> str:
  305. # b/c data is represented as ints make sure we can't have ambiguous
  306. # indexing
  307. return "period"
  308. # ------------------------------------------------------------------------
  309. # Indexing Methods
  310. def _convert_tolerance(self, tolerance, target):
  311. # Returned tolerance must be in dtype/units so that
  312. # `|self._get_engine_target() - target._engine_target()| <= tolerance`
  313. # is meaningful. Since PeriodIndex returns int64 for engine_target,
  314. # we may need to convert timedelta64 tolerance to int64.
  315. tolerance = super()._convert_tolerance(tolerance, target)
  316. if self.dtype == target.dtype:
  317. # convert tolerance to i8
  318. tolerance = self._maybe_convert_timedelta(tolerance)
  319. return tolerance
  320. def get_loc(self, key):
  321. """
  322. Get integer location for requested label.
  323. Parameters
  324. ----------
  325. key : Period, NaT, str, or datetime
  326. String or datetime key must be parsable as Period.
  327. Returns
  328. -------
  329. loc : int or ndarray[int64]
  330. Raises
  331. ------
  332. KeyError
  333. Key is not present in the index.
  334. TypeError
  335. If key is listlike or otherwise not hashable.
  336. """
  337. orig_key = key
  338. self._check_indexing_error(key)
  339. if is_valid_na_for_dtype(key, self.dtype):
  340. key = NaT
  341. elif isinstance(key, str):
  342. try:
  343. parsed, reso = self._parse_with_reso(key)
  344. except ValueError as err:
  345. # A string with invalid format
  346. raise KeyError(f"Cannot interpret '{key}' as period") from err
  347. if self._can_partial_date_slice(reso):
  348. try:
  349. return self._partial_date_slice(reso, parsed)
  350. except KeyError as err:
  351. raise KeyError(key) from err
  352. if reso == self._resolution_obj:
  353. # the reso < self._resolution_obj case goes
  354. # through _get_string_slice
  355. key = self._cast_partial_indexing_scalar(parsed)
  356. else:
  357. raise KeyError(key)
  358. elif isinstance(key, Period):
  359. self._disallow_mismatched_indexing(key)
  360. elif isinstance(key, datetime):
  361. key = self._cast_partial_indexing_scalar(key)
  362. else:
  363. # in particular integer, which Period constructor would cast to string
  364. raise KeyError(key)
  365. try:
  366. return Index.get_loc(self, key)
  367. except KeyError as err:
  368. raise KeyError(orig_key) from err
  369. def _disallow_mismatched_indexing(self, key: Period) -> None:
  370. sfreq = self.freq
  371. kfreq = key.freq
  372. if not (
  373. sfreq.n == kfreq.n
  374. # error: "BaseOffset" has no attribute "_period_dtype_code"
  375. and sfreq._period_dtype_code # type: ignore[attr-defined]
  376. # error: "BaseOffset" has no attribute "_period_dtype_code"
  377. == kfreq._period_dtype_code # type: ignore[attr-defined]
  378. ):
  379. # GH#42247 For the subset of DateOffsets that can be Period freqs,
  380. # checking these two attributes is sufficient to check equality,
  381. # and much more performant than `self.freq == key.freq`
  382. raise KeyError(key)
  383. def _cast_partial_indexing_scalar(self, label: datetime) -> Period:
  384. try:
  385. period = Period(label, freq=self.freq)
  386. except ValueError as err:
  387. # we cannot construct the Period
  388. raise KeyError(label) from err
  389. return period
  390. @doc(DatetimeIndexOpsMixin._maybe_cast_slice_bound)
  391. def _maybe_cast_slice_bound(self, label, side: str):
  392. if isinstance(label, datetime):
  393. label = self._cast_partial_indexing_scalar(label)
  394. return super()._maybe_cast_slice_bound(label, side)
  395. def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
  396. iv = Period(parsed, freq=reso.attr_abbrev)
  397. return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end"))
  398. @doc(DatetimeIndexOpsMixin.shift)
  399. def shift(self, periods: int = 1, freq=None):
  400. if freq is not None:
  401. raise TypeError(
  402. f"`freq` argument is not supported for {type(self).__name__}.shift"
  403. )
  404. return self + periods
  405. def period_range(
  406. start=None, end=None, periods: int | None = None, freq=None, name=None
  407. ) -> PeriodIndex:
  408. """
  409. Return a fixed frequency PeriodIndex.
  410. The day (calendar) is the default frequency.
  411. Parameters
  412. ----------
  413. start : str or period-like, default None
  414. Left bound for generating periods.
  415. end : str or period-like, default None
  416. Right bound for generating periods.
  417. periods : int, default None
  418. Number of periods to generate.
  419. freq : str or DateOffset, optional
  420. Frequency alias. By default the freq is taken from `start` or `end`
  421. if those are Period objects. Otherwise, the default is ``"D"`` for
  422. daily frequency.
  423. name : str, default None
  424. Name of the resulting PeriodIndex.
  425. Returns
  426. -------
  427. PeriodIndex
  428. Notes
  429. -----
  430. Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
  431. must be specified.
  432. To learn more about the frequency strings, please see `this link
  433. <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
  434. Examples
  435. --------
  436. >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M')
  437. PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',
  438. '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',
  439. '2018-01'],
  440. dtype='period[M]')
  441. If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor
  442. endpoints for a ``PeriodIndex`` with frequency matching that of the
  443. ``period_range`` constructor.
  444. >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'),
  445. ... end=pd.Period('2017Q2', freq='Q'), freq='M')
  446. PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'],
  447. dtype='period[M]')
  448. """
  449. if com.count_not_none(start, end, periods) != 2:
  450. raise ValueError(
  451. "Of the three parameters: start, end, and periods, "
  452. "exactly two must be specified"
  453. )
  454. if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)):
  455. freq = "D"
  456. data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={})
  457. data = PeriodArray(data, freq=freq)
  458. return PeriodIndex(data, name=name)