123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547 |
- from __future__ import annotations
- from datetime import (
- datetime,
- timedelta,
- )
- from typing import Hashable
- import numpy as np
- from pandas._libs import index as libindex
- from pandas._libs.tslibs import (
- BaseOffset,
- NaT,
- Period,
- Resolution,
- Tick,
- )
- from pandas._typing import (
- Dtype,
- DtypeObj,
- npt,
- )
- from pandas.util._decorators import (
- cache_readonly,
- doc,
- )
- from pandas.core.dtypes.common import is_integer
- from pandas.core.dtypes.dtypes import PeriodDtype
- from pandas.core.dtypes.generic import ABCSeries
- from pandas.core.dtypes.missing import is_valid_na_for_dtype
- from pandas.core.arrays.period import (
- PeriodArray,
- period_array,
- raise_on_incompatible,
- validate_dtype_freq,
- )
- import pandas.core.common as com
- import pandas.core.indexes.base as ibase
- from pandas.core.indexes.base import maybe_extract_name
- from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
- from pandas.core.indexes.datetimes import (
- DatetimeIndex,
- Index,
- )
- from pandas.core.indexes.extension import inherit_names
- _index_doc_kwargs = dict(ibase._index_doc_kwargs)
- _index_doc_kwargs.update({"target_klass": "PeriodIndex or list of Periods"})
- _shared_doc_kwargs = {
- "klass": "PeriodArray",
- }
- # --- Period index sketch
- def _new_PeriodIndex(cls, **d):
- # GH13277 for unpickling
- values = d.pop("data")
- if values.dtype == "int64":
- freq = d.pop("freq", None)
- values = PeriodArray(values, freq=freq)
- return cls._simple_new(values, **d)
- else:
- return cls(values, **d)
- @inherit_names(
- ["strftime", "start_time", "end_time"] + PeriodArray._field_ops,
- PeriodArray,
- wrap=True,
- )
- @inherit_names(["is_leap_year", "_format_native_types"], PeriodArray)
- class PeriodIndex(DatetimeIndexOpsMixin):
- """
- Immutable ndarray holding ordinal values indicating regular periods in time.
- Index keys are boxed to Period objects which carries the metadata (eg,
- frequency information).
- Parameters
- ----------
- data : array-like (1d int np.ndarray or PeriodArray), optional
- Optional period-like data to construct index with.
- copy : bool
- Make a copy of input ndarray.
- freq : str or period object, optional
- One of pandas period strings or corresponding objects.
- year : int, array, or Series, default None
- month : int, array, or Series, default None
- quarter : int, array, or Series, default None
- day : int, array, or Series, default None
- hour : int, array, or Series, default None
- minute : int, array, or Series, default None
- second : int, array, or Series, default None
- dtype : str or PeriodDtype, default None
- Attributes
- ----------
- day
- dayofweek
- day_of_week
- dayofyear
- day_of_year
- days_in_month
- daysinmonth
- end_time
- freq
- freqstr
- hour
- is_leap_year
- minute
- month
- quarter
- qyear
- second
- start_time
- week
- weekday
- weekofyear
- year
- Methods
- -------
- asfreq
- strftime
- to_timestamp
- See Also
- --------
- Index : The base pandas Index type.
- Period : Represents a period of time.
- DatetimeIndex : Index with datetime64 data.
- TimedeltaIndex : Index of timedelta64 data.
- period_range : Create a fixed-frequency PeriodIndex.
- Examples
- --------
- >>> idx = pd.PeriodIndex(year=[2000, 2002], quarter=[1, 3])
- >>> idx
- PeriodIndex(['2000Q1', '2002Q3'], dtype='period[Q-DEC]')
- """
- _typ = "periodindex"
- _data: PeriodArray
- freq: BaseOffset
- dtype: PeriodDtype
- _data_cls = PeriodArray
- _supports_partial_string_indexing = True
- @property
- def _engine_type(self) -> type[libindex.PeriodEngine]:
- return libindex.PeriodEngine
- @cache_readonly
- def _resolution_obj(self) -> Resolution:
- # for compat with DatetimeIndex
- return self.dtype._resolution_obj
- # --------------------------------------------------------------------
- # methods that dispatch to array and wrap result in Index
- # These are defined here instead of via inherit_names for mypy
- @doc(
- PeriodArray.asfreq,
- other="pandas.arrays.PeriodArray",
- other_name="PeriodArray",
- **_shared_doc_kwargs,
- )
- def asfreq(self, freq=None, how: str = "E") -> PeriodIndex:
- arr = self._data.asfreq(freq, how)
- return type(self)._simple_new(arr, name=self.name)
- @doc(PeriodArray.to_timestamp)
- def to_timestamp(self, freq=None, how: str = "start") -> DatetimeIndex:
- arr = self._data.to_timestamp(freq, how)
- return DatetimeIndex._simple_new(arr, name=self.name)
- @property
- @doc(PeriodArray.hour.fget)
- def hour(self) -> Index:
- return Index(self._data.hour, name=self.name)
- @property
- @doc(PeriodArray.minute.fget)
- def minute(self) -> Index:
- return Index(self._data.minute, name=self.name)
- @property
- @doc(PeriodArray.second.fget)
- def second(self) -> Index:
- return Index(self._data.second, name=self.name)
- # ------------------------------------------------------------------------
- # Index Constructors
- def __new__(
- cls,
- data=None,
- ordinal=None,
- freq=None,
- dtype: Dtype | None = None,
- copy: bool = False,
- name: Hashable = None,
- **fields,
- ) -> PeriodIndex:
- valid_field_set = {
- "year",
- "month",
- "day",
- "quarter",
- "hour",
- "minute",
- "second",
- }
- refs = None
- if not copy and isinstance(data, (Index, ABCSeries)):
- refs = data._references
- if not set(fields).issubset(valid_field_set):
- argument = list(set(fields) - valid_field_set)[0]
- raise TypeError(f"__new__() got an unexpected keyword argument {argument}")
- name = maybe_extract_name(name, data, cls)
- if data is None and ordinal is None:
- # range-based.
- if not fields:
- # test_pickle_compat_construction
- cls._raise_scalar_data_error(None)
- data, freq2 = PeriodArray._generate_range(None, None, None, freq, fields)
- # PeriodArray._generate range does validation that fields is
- # empty when really using the range-based constructor.
- freq = freq2
- data = PeriodArray(data, freq=freq)
- else:
- freq = validate_dtype_freq(dtype, freq)
- # PeriodIndex allow PeriodIndex(period_index, freq=different)
- # Let's not encourage that kind of behavior in PeriodArray.
- if freq and isinstance(data, cls) and data.freq != freq:
- # TODO: We can do some of these with no-copy / coercion?
- # e.g. D -> 2D seems to be OK
- data = data.asfreq(freq)
- if data is None and ordinal is not None:
- # we strangely ignore `ordinal` if data is passed.
- ordinal = np.asarray(ordinal, dtype=np.int64)
- data = PeriodArray(ordinal, freq=freq)
- else:
- # don't pass copy here, since we copy later.
- data = period_array(data=data, freq=freq)
- if copy:
- data = data.copy()
- return cls._simple_new(data, name=name, refs=refs)
- # ------------------------------------------------------------------------
- # Data
- @property
- def values(self) -> np.ndarray:
- return np.asarray(self, dtype=object)
- def _maybe_convert_timedelta(self, other) -> int | npt.NDArray[np.int64]:
- """
- Convert timedelta-like input to an integer multiple of self.freq
- Parameters
- ----------
- other : timedelta, np.timedelta64, DateOffset, int, np.ndarray
- Returns
- -------
- converted : int, np.ndarray[int64]
- Raises
- ------
- IncompatibleFrequency : if the input cannot be written as a multiple
- of self.freq. Note IncompatibleFrequency subclasses ValueError.
- """
- if isinstance(other, (timedelta, np.timedelta64, Tick, np.ndarray)):
- if isinstance(self.freq, Tick):
- # _check_timedeltalike_freq_compat will raise if incompatible
- delta = self._data._check_timedeltalike_freq_compat(other)
- return delta
- elif isinstance(other, BaseOffset):
- if other.base == self.freq.base:
- return other.n
- raise raise_on_incompatible(self, other)
- elif is_integer(other):
- # integer is passed to .shift via
- # _add_datetimelike_methods basically
- # but ufunc may pass integer to _add_delta
- return other
- # raise when input doesn't have freq
- raise raise_on_incompatible(self, None)
- def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
- """
- Can we compare values of the given dtype to our own?
- """
- if not isinstance(dtype, PeriodDtype):
- return False
- # For the subset of DateOffsets that can be a dtype.freq, it
- # suffices (and is much faster) to compare the dtype_code rather than
- # the freq itself.
- # See also: PeriodDtype.__eq__
- freq = dtype.freq
- own_freq = self.freq
- return (
- freq._period_dtype_code
- # error: "BaseOffset" has no attribute "_period_dtype_code"
- == own_freq._period_dtype_code # type: ignore[attr-defined]
- and freq.n == own_freq.n
- )
- # ------------------------------------------------------------------------
- # Index Methods
- def asof_locs(self, where: Index, mask: npt.NDArray[np.bool_]) -> np.ndarray:
- """
- where : array of timestamps
- mask : np.ndarray[bool]
- Array of booleans where data is not NA.
- """
- if isinstance(where, DatetimeIndex):
- where = PeriodIndex(where._values, freq=self.freq)
- elif not isinstance(where, PeriodIndex):
- raise TypeError("asof_locs `where` must be DatetimeIndex or PeriodIndex")
- return super().asof_locs(where, mask)
- @property
- def is_full(self) -> bool:
- """
- Returns True if this PeriodIndex is range-like in that all Periods
- between start and end are present, in order.
- """
- if len(self) == 0:
- return True
- if not self.is_monotonic_increasing:
- raise ValueError("Index is not monotonic")
- values = self.asi8
- return bool(((values[1:] - values[:-1]) < 2).all())
- @property
- def inferred_type(self) -> str:
- # b/c data is represented as ints make sure we can't have ambiguous
- # indexing
- return "period"
- # ------------------------------------------------------------------------
- # Indexing Methods
- def _convert_tolerance(self, tolerance, target):
- # Returned tolerance must be in dtype/units so that
- # `|self._get_engine_target() - target._engine_target()| <= tolerance`
- # is meaningful. Since PeriodIndex returns int64 for engine_target,
- # we may need to convert timedelta64 tolerance to int64.
- tolerance = super()._convert_tolerance(tolerance, target)
- if self.dtype == target.dtype:
- # convert tolerance to i8
- tolerance = self._maybe_convert_timedelta(tolerance)
- return tolerance
- def get_loc(self, key):
- """
- Get integer location for requested label.
- Parameters
- ----------
- key : Period, NaT, str, or datetime
- String or datetime key must be parsable as Period.
- Returns
- -------
- loc : int or ndarray[int64]
- Raises
- ------
- KeyError
- Key is not present in the index.
- TypeError
- If key is listlike or otherwise not hashable.
- """
- orig_key = key
- self._check_indexing_error(key)
- if is_valid_na_for_dtype(key, self.dtype):
- key = NaT
- elif isinstance(key, str):
- try:
- parsed, reso = self._parse_with_reso(key)
- except ValueError as err:
- # A string with invalid format
- raise KeyError(f"Cannot interpret '{key}' as period") from err
- if self._can_partial_date_slice(reso):
- try:
- return self._partial_date_slice(reso, parsed)
- except KeyError as err:
- raise KeyError(key) from err
- if reso == self._resolution_obj:
- # the reso < self._resolution_obj case goes
- # through _get_string_slice
- key = self._cast_partial_indexing_scalar(parsed)
- else:
- raise KeyError(key)
- elif isinstance(key, Period):
- self._disallow_mismatched_indexing(key)
- elif isinstance(key, datetime):
- key = self._cast_partial_indexing_scalar(key)
- else:
- # in particular integer, which Period constructor would cast to string
- raise KeyError(key)
- try:
- return Index.get_loc(self, key)
- except KeyError as err:
- raise KeyError(orig_key) from err
- def _disallow_mismatched_indexing(self, key: Period) -> None:
- sfreq = self.freq
- kfreq = key.freq
- if not (
- sfreq.n == kfreq.n
- # error: "BaseOffset" has no attribute "_period_dtype_code"
- and sfreq._period_dtype_code # type: ignore[attr-defined]
- # error: "BaseOffset" has no attribute "_period_dtype_code"
- == kfreq._period_dtype_code # type: ignore[attr-defined]
- ):
- # GH#42247 For the subset of DateOffsets that can be Period freqs,
- # checking these two attributes is sufficient to check equality,
- # and much more performant than `self.freq == key.freq`
- raise KeyError(key)
- def _cast_partial_indexing_scalar(self, label: datetime) -> Period:
- try:
- period = Period(label, freq=self.freq)
- except ValueError as err:
- # we cannot construct the Period
- raise KeyError(label) from err
- return period
- @doc(DatetimeIndexOpsMixin._maybe_cast_slice_bound)
- def _maybe_cast_slice_bound(self, label, side: str):
- if isinstance(label, datetime):
- label = self._cast_partial_indexing_scalar(label)
- return super()._maybe_cast_slice_bound(label, side)
- def _parsed_string_to_bounds(self, reso: Resolution, parsed: datetime):
- iv = Period(parsed, freq=reso.attr_abbrev)
- return (iv.asfreq(self.freq, how="start"), iv.asfreq(self.freq, how="end"))
- @doc(DatetimeIndexOpsMixin.shift)
- def shift(self, periods: int = 1, freq=None):
- if freq is not None:
- raise TypeError(
- f"`freq` argument is not supported for {type(self).__name__}.shift"
- )
- return self + periods
- def period_range(
- start=None, end=None, periods: int | None = None, freq=None, name=None
- ) -> PeriodIndex:
- """
- Return a fixed frequency PeriodIndex.
- The day (calendar) is the default frequency.
- Parameters
- ----------
- start : str or period-like, default None
- Left bound for generating periods.
- end : str or period-like, default None
- Right bound for generating periods.
- periods : int, default None
- Number of periods to generate.
- freq : str or DateOffset, optional
- Frequency alias. By default the freq is taken from `start` or `end`
- if those are Period objects. Otherwise, the default is ``"D"`` for
- daily frequency.
- name : str, default None
- Name of the resulting PeriodIndex.
- Returns
- -------
- PeriodIndex
- Notes
- -----
- Of the three parameters: ``start``, ``end``, and ``periods``, exactly two
- must be specified.
- To learn more about the frequency strings, please see `this link
- <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
- Examples
- --------
- >>> pd.period_range(start='2017-01-01', end='2018-01-01', freq='M')
- PeriodIndex(['2017-01', '2017-02', '2017-03', '2017-04', '2017-05', '2017-06',
- '2017-07', '2017-08', '2017-09', '2017-10', '2017-11', '2017-12',
- '2018-01'],
- dtype='period[M]')
- If ``start`` or ``end`` are ``Period`` objects, they will be used as anchor
- endpoints for a ``PeriodIndex`` with frequency matching that of the
- ``period_range`` constructor.
- >>> pd.period_range(start=pd.Period('2017Q1', freq='Q'),
- ... end=pd.Period('2017Q2', freq='Q'), freq='M')
- PeriodIndex(['2017-03', '2017-04', '2017-05', '2017-06'],
- dtype='period[M]')
- """
- if com.count_not_none(start, end, periods) != 2:
- raise ValueError(
- "Of the three parameters: start, end, and periods, "
- "exactly two must be specified"
- )
- if freq is None and (not isinstance(start, Period) and not isinstance(end, Period)):
- freq = "D"
- data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={})
- data = PeriodArray(data, freq=freq)
- return PeriodIndex(data, name=name)
|