12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148 |
- from __future__ import annotations
- from datetime import timedelta
- import operator
- from typing import (
- TYPE_CHECKING,
- Any,
- Callable,
- Literal,
- Sequence,
- TypeVar,
- overload,
- )
- import numpy as np
- from pandas._libs import (
- algos as libalgos,
- lib,
- )
- from pandas._libs.arrays import NDArrayBacked
- from pandas._libs.tslibs import (
- BaseOffset,
- NaT,
- NaTType,
- Timedelta,
- astype_overflowsafe,
- dt64arr_to_periodarr as c_dt64arr_to_periodarr,
- get_unit_from_dtype,
- iNaT,
- parsing,
- period as libperiod,
- to_offset,
- )
- from pandas._libs.tslibs.dtypes import FreqGroup
- from pandas._libs.tslibs.fields import isleapyear_arr
- from pandas._libs.tslibs.offsets import (
- Tick,
- delta_to_tick,
- )
- from pandas._libs.tslibs.period import (
- DIFFERENT_FREQ,
- IncompatibleFrequency,
- Period,
- get_period_field_arr,
- period_asfreq_arr,
- )
- from pandas._typing import (
- AnyArrayLike,
- Dtype,
- NpDtype,
- npt,
- )
- from pandas.util._decorators import (
- cache_readonly,
- doc,
- )
- from pandas.core.dtypes.common import (
- ensure_object,
- is_datetime64_any_dtype,
- is_datetime64_dtype,
- is_dtype_equal,
- is_float_dtype,
- is_integer_dtype,
- is_period_dtype,
- pandas_dtype,
- )
- from pandas.core.dtypes.dtypes import PeriodDtype
- from pandas.core.dtypes.generic import (
- ABCIndex,
- ABCPeriodIndex,
- ABCSeries,
- ABCTimedeltaArray,
- )
- from pandas.core.dtypes.missing import isna
- import pandas.core.algorithms as algos
- from pandas.core.arrays import datetimelike as dtl
- import pandas.core.common as com
- if TYPE_CHECKING:
- from pandas._typing import (
- NumpySorter,
- NumpyValueArrayLike,
- )
- from pandas.core.arrays import (
- DatetimeArray,
- TimedeltaArray,
- )
- from pandas.core.arrays.base import ExtensionArray
- BaseOffsetT = TypeVar("BaseOffsetT", bound=BaseOffset)
- _shared_doc_kwargs = {
- "klass": "PeriodArray",
- }
- def _field_accessor(name: str, docstring=None):
- def f(self):
- base = self.freq._period_dtype_code
- result = get_period_field_arr(name, self.asi8, base)
- return result
- f.__name__ = name
- f.__doc__ = docstring
- return property(f)
- class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin):
- """
- Pandas ExtensionArray for storing Period data.
- Users should use :func:`~pandas.period_array` to create new instances.
- Alternatively, :func:`~pandas.array` can be used to create new instances
- from a sequence of Period scalars.
- Parameters
- ----------
- values : Union[PeriodArray, Series[period], ndarray[int], PeriodIndex]
- The data to store. These should be arrays that can be directly
- converted to ordinals without inference or copy (PeriodArray,
- ndarray[int64]), or a box around such an array (Series[period],
- PeriodIndex).
- dtype : PeriodDtype, optional
- A PeriodDtype instance from which to extract a `freq`. If both
- `freq` and `dtype` are specified, then the frequencies must match.
- freq : str or DateOffset
- The `freq` to use for the array. Mostly applicable when `values`
- is an ndarray of integers, when `freq` is required. When `values`
- is a PeriodArray (or box around), it's checked that ``values.freq``
- matches `freq`.
- copy : bool, default False
- Whether to copy the ordinals before storing.
- Attributes
- ----------
- None
- Methods
- -------
- None
- See Also
- --------
- Period: Represents a period of time.
- PeriodIndex : Immutable Index for period data.
- period_range: Create a fixed-frequency PeriodArray.
- array: Construct a pandas array.
- Notes
- -----
- There are two components to a PeriodArray
- - ordinals : integer ndarray
- - freq : pd.tseries.offsets.Offset
- The values are physically stored as a 1-D ndarray of integers. These are
- called "ordinals" and represent some kind of offset from a base.
- The `freq` indicates the span covered by each element of the array.
- All elements in the PeriodArray have the same `freq`.
- """
- # array priority higher than numpy scalars
- __array_priority__ = 1000
- _typ = "periodarray" # ABCPeriodArray
- _internal_fill_value = np.int64(iNaT)
- _recognized_scalars = (Period,)
- _is_recognized_dtype = is_period_dtype # check_compatible_with checks freq match
- _infer_matches = ("period",)
- @property
- def _scalar_type(self) -> type[Period]:
- return Period
- # Names others delegate to us
- _other_ops: list[str] = []
- _bool_ops: list[str] = ["is_leap_year"]
- _object_ops: list[str] = ["start_time", "end_time", "freq"]
- _field_ops: list[str] = [
- "year",
- "month",
- "day",
- "hour",
- "minute",
- "second",
- "weekofyear",
- "weekday",
- "week",
- "dayofweek",
- "day_of_week",
- "dayofyear",
- "day_of_year",
- "quarter",
- "qyear",
- "days_in_month",
- "daysinmonth",
- ]
- _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops
- _datetimelike_methods: list[str] = ["strftime", "to_timestamp", "asfreq"]
- _dtype: PeriodDtype
- # --------------------------------------------------------------------
- # Constructors
- def __init__(
- self, values, dtype: Dtype | None = None, freq=None, copy: bool = False
- ) -> None:
- freq = validate_dtype_freq(dtype, freq)
- if freq is not None:
- freq = Period._maybe_convert_freq(freq)
- if isinstance(values, ABCSeries):
- values = values._values
- if not isinstance(values, type(self)):
- raise TypeError("Incorrect dtype")
- elif isinstance(values, ABCPeriodIndex):
- values = values._values
- if isinstance(values, type(self)):
- if freq is not None and freq != values.freq:
- raise raise_on_incompatible(values, freq)
- values, freq = values._ndarray, values.freq
- values = np.array(values, dtype="int64", copy=copy)
- if freq is None:
- raise ValueError("freq is not specified and cannot be inferred")
- NDArrayBacked.__init__(self, values, PeriodDtype(freq))
- # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked"
- @classmethod
- def _simple_new( # type: ignore[override]
- cls,
- values: np.ndarray,
- freq: BaseOffset | None = None,
- dtype: Dtype | None = None,
- ) -> PeriodArray:
- # alias for PeriodArray.__init__
- assertion_msg = "Should be numpy array of type i8"
- assert isinstance(values, np.ndarray) and values.dtype == "i8", assertion_msg
- return cls(values, freq=freq, dtype=dtype)
- @classmethod
- def _from_sequence(
- cls: type[PeriodArray],
- scalars: Sequence[Period | None] | AnyArrayLike,
- *,
- dtype: Dtype | None = None,
- copy: bool = False,
- ) -> PeriodArray:
- if dtype and isinstance(dtype, PeriodDtype):
- freq = dtype.freq
- else:
- freq = None
- if isinstance(scalars, cls):
- validate_dtype_freq(scalars.dtype, freq)
- if copy:
- scalars = scalars.copy()
- return scalars
- periods = np.asarray(scalars, dtype=object)
- freq = freq or libperiod.extract_freq(periods)
- ordinals = libperiod.extract_ordinals(periods, freq)
- return cls(ordinals, freq=freq)
- @classmethod
- def _from_sequence_of_strings(
- cls, strings, *, dtype: Dtype | None = None, copy: bool = False
- ) -> PeriodArray:
- return cls._from_sequence(strings, dtype=dtype, copy=copy)
- @classmethod
- def _from_datetime64(cls, data, freq, tz=None) -> PeriodArray:
- """
- Construct a PeriodArray from a datetime64 array
- Parameters
- ----------
- data : ndarray[datetime64[ns], datetime64[ns, tz]]
- freq : str or Tick
- tz : tzinfo, optional
- Returns
- -------
- PeriodArray[freq]
- """
- data, freq = dt64arr_to_periodarr(data, freq, tz)
- return cls(data, freq=freq)
- @classmethod
- def _generate_range(cls, start, end, periods, freq, fields):
- periods = dtl.validate_periods(periods)
- if freq is not None:
- freq = Period._maybe_convert_freq(freq)
- field_count = len(fields)
- if start is not None or end is not None:
- if field_count > 0:
- raise ValueError(
- "Can either instantiate from fields or endpoints, but not both"
- )
- subarr, freq = _get_ordinal_range(start, end, periods, freq)
- elif field_count > 0:
- subarr, freq = _range_from_fields(freq=freq, **fields)
- else:
- raise ValueError("Not enough parameters to construct Period range")
- return subarr, freq
- # -----------------------------------------------------------------
- # DatetimeLike Interface
- # error: Argument 1 of "_unbox_scalar" is incompatible with supertype
- # "DatetimeLikeArrayMixin"; supertype defines the argument type as
- # "Union[Union[Period, Any, Timedelta], NaTType]"
- def _unbox_scalar( # type: ignore[override]
- self,
- value: Period | NaTType,
- ) -> np.int64:
- if value is NaT:
- # error: Item "Period" of "Union[Period, NaTType]" has no attribute "value"
- return np.int64(value._value) # type: ignore[union-attr]
- elif isinstance(value, self._scalar_type):
- self._check_compatible_with(value)
- return np.int64(value.ordinal)
- else:
- raise ValueError(f"'value' should be a Period. Got '{value}' instead.")
- def _scalar_from_string(self, value: str) -> Period:
- return Period(value, freq=self.freq)
- def _check_compatible_with(self, other) -> None:
- if other is NaT:
- return
- self._require_matching_freq(other)
- # --------------------------------------------------------------------
- # Data / Attributes
- @cache_readonly
- def dtype(self) -> PeriodDtype:
- return self._dtype
- # error: Cannot override writeable attribute with read-only property
- @property # type: ignore[override]
- def freq(self) -> BaseOffset:
- """
- Return the frequency object for this PeriodArray.
- """
- return self.dtype.freq
- def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
- if dtype == "i8":
- return self.asi8
- elif dtype == bool:
- return ~self._isnan
- # This will raise TypeError for non-object dtypes
- return np.array(list(self), dtype=object)
- def __arrow_array__(self, type=None):
- """
- Convert myself into a pyarrow Array.
- """
- import pyarrow
- from pandas.core.arrays.arrow.extension_types import ArrowPeriodType
- if type is not None:
- if pyarrow.types.is_integer(type):
- return pyarrow.array(self._ndarray, mask=self.isna(), type=type)
- elif isinstance(type, ArrowPeriodType):
- # ensure we have the same freq
- if self.freqstr != type.freq:
- raise TypeError(
- "Not supported to convert PeriodArray to array with different "
- f"'freq' ({self.freqstr} vs {type.freq})"
- )
- else:
- raise TypeError(
- f"Not supported to convert PeriodArray to '{type}' type"
- )
- period_type = ArrowPeriodType(self.freqstr)
- storage_array = pyarrow.array(self._ndarray, mask=self.isna(), type="int64")
- return pyarrow.ExtensionArray.from_storage(period_type, storage_array)
- # --------------------------------------------------------------------
- # Vectorized analogues of Period properties
- year = _field_accessor(
- "year",
- """
- The year of the period.
- """,
- )
- month = _field_accessor(
- "month",
- """
- The month as January=1, December=12.
- """,
- )
- day = _field_accessor(
- "day",
- """
- The days of the period.
- """,
- )
- hour = _field_accessor(
- "hour",
- """
- The hour of the period.
- """,
- )
- minute = _field_accessor(
- "minute",
- """
- The minute of the period.
- """,
- )
- second = _field_accessor(
- "second",
- """
- The second of the period.
- """,
- )
- weekofyear = _field_accessor(
- "week",
- """
- The week ordinal of the year.
- """,
- )
- week = weekofyear
- day_of_week = _field_accessor(
- "day_of_week",
- """
- The day of the week with Monday=0, Sunday=6.
- """,
- )
- dayofweek = day_of_week
- weekday = dayofweek
- dayofyear = day_of_year = _field_accessor(
- "day_of_year",
- """
- The ordinal day of the year.
- """,
- )
- quarter = _field_accessor(
- "quarter",
- """
- The quarter of the date.
- """,
- )
- qyear = _field_accessor("qyear")
- days_in_month = _field_accessor(
- "days_in_month",
- """
- The number of days in the month.
- """,
- )
- daysinmonth = days_in_month
- @property
- def is_leap_year(self) -> np.ndarray:
- """
- Logical indicating if the date belongs to a leap year.
- """
- return isleapyear_arr(np.asarray(self.year))
- def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray:
- """
- Cast to DatetimeArray/Index.
- Parameters
- ----------
- freq : str or DateOffset, optional
- Target frequency. The default is 'D' for week or longer,
- 'S' otherwise.
- how : {'s', 'e', 'start', 'end'}
- Whether to use the start or end of the time period being converted.
- Returns
- -------
- DatetimeArray/Index
- """
- from pandas.core.arrays import DatetimeArray
- how = libperiod.validate_end_alias(how)
- end = how == "E"
- if end:
- if freq == "B" or self.freq == "B":
- # roll forward to ensure we land on B date
- adjust = Timedelta(1, "D") - Timedelta(1, "ns")
- return self.to_timestamp(how="start") + adjust
- else:
- adjust = Timedelta(1, "ns")
- return (self + self.freq).to_timestamp(how="start") - adjust
- if freq is None:
- freq = self._dtype._get_to_timestamp_base()
- base = freq
- else:
- freq = Period._maybe_convert_freq(freq)
- base = freq._period_dtype_code
- new_parr = self.asfreq(freq, how=how)
- new_data = libperiod.periodarr_to_dt64arr(new_parr.asi8, base)
- dta = DatetimeArray(new_data)
- if self.freq.name == "B":
- # See if we can retain BDay instead of Day in cases where
- # len(self) is too small for infer_freq to distinguish between them
- diffs = libalgos.unique_deltas(self.asi8)
- if len(diffs) == 1:
- diff = diffs[0]
- if diff == self.freq.n:
- dta._freq = self.freq
- elif diff == 1:
- dta._freq = self.freq.base
- # TODO: other cases?
- return dta
- else:
- return dta._with_freq("infer")
- # --------------------------------------------------------------------
- def _box_func(self, x) -> Period | NaTType:
- return Period._from_ordinal(ordinal=x, freq=self.freq)
- @doc(**_shared_doc_kwargs, other="PeriodIndex", other_name="PeriodIndex")
- def asfreq(self, freq=None, how: str = "E") -> PeriodArray:
- """
- Convert the {klass} to the specified frequency `freq`.
- Equivalent to applying :meth:`pandas.Period.asfreq` with the given arguments
- to each :class:`~pandas.Period` in this {klass}.
- Parameters
- ----------
- freq : str
- A frequency.
- how : str {{'E', 'S'}}, default 'E'
- Whether the elements should be aligned to the end
- or start within pa period.
- * 'E', 'END', or 'FINISH' for end,
- * 'S', 'START', or 'BEGIN' for start.
- January 31st ('END') vs. January 1st ('START') for example.
- Returns
- -------
- {klass}
- The transformed {klass} with the new frequency.
- See Also
- --------
- {other}.asfreq: Convert each Period in a {other_name} to the given frequency.
- Period.asfreq : Convert a :class:`~pandas.Period` object to the given frequency.
- Examples
- --------
- >>> pidx = pd.period_range('2010-01-01', '2015-01-01', freq='A')
- >>> pidx
- PeriodIndex(['2010', '2011', '2012', '2013', '2014', '2015'],
- dtype='period[A-DEC]')
- >>> pidx.asfreq('M')
- PeriodIndex(['2010-12', '2011-12', '2012-12', '2013-12', '2014-12',
- '2015-12'], dtype='period[M]')
- >>> pidx.asfreq('M', how='S')
- PeriodIndex(['2010-01', '2011-01', '2012-01', '2013-01', '2014-01',
- '2015-01'], dtype='period[M]')
- """
- how = libperiod.validate_end_alias(how)
- freq = Period._maybe_convert_freq(freq)
- base1 = self._dtype._dtype_code
- base2 = freq._period_dtype_code
- asi8 = self.asi8
- # self.freq.n can't be negative or 0
- end = how == "E"
- if end:
- ordinal = asi8 + self.freq.n - 1
- else:
- ordinal = asi8
- new_data = period_asfreq_arr(ordinal, base1, base2, end)
- if self._hasna:
- new_data[self._isnan] = iNaT
- return type(self)(new_data, freq=freq)
- # ------------------------------------------------------------------
- # Rendering Methods
- def _formatter(self, boxed: bool = False):
- if boxed:
- return str
- return "'{}'".format
- @dtl.ravel_compat
- def _format_native_types(
- self, *, na_rep: str | float = "NaT", date_format=None, **kwargs
- ) -> npt.NDArray[np.object_]:
- """
- actually format my specific types
- """
- values = self.astype(object)
- # Create the formatter function
- if date_format:
- formatter = lambda per: per.strftime(date_format)
- else:
- # Uses `_Period.str` which in turn uses `format_period`
- formatter = lambda per: str(per)
- # Apply the formatter to all values in the array, possibly with a mask
- if self._hasna:
- mask = self._isnan
- values[mask] = na_rep
- imask = ~mask
- values[imask] = np.array([formatter(per) for per in values[imask]])
- else:
- values = np.array([formatter(per) for per in values])
- return values
- # ------------------------------------------------------------------
- def astype(self, dtype, copy: bool = True):
- # We handle Period[T] -> Period[U]
- # Our parent handles everything else.
- dtype = pandas_dtype(dtype)
- if is_dtype_equal(dtype, self._dtype):
- if not copy:
- return self
- else:
- return self.copy()
- if is_period_dtype(dtype):
- return self.asfreq(dtype.freq)
- if is_datetime64_any_dtype(dtype):
- # GH#45038 match PeriodIndex behavior.
- tz = getattr(dtype, "tz", None)
- return self.to_timestamp().tz_localize(tz)
- return super().astype(dtype, copy=copy)
- def searchsorted(
- self,
- value: NumpyValueArrayLike | ExtensionArray,
- side: Literal["left", "right"] = "left",
- sorter: NumpySorter = None,
- ) -> npt.NDArray[np.intp] | np.intp:
- npvalue = self._validate_setitem_value(value).view("M8[ns]")
- # Cast to M8 to get datetime-like NaT placement,
- # similar to dtl._period_dispatch
- m8arr = self._ndarray.view("M8[ns]")
- return m8arr.searchsorted(npvalue, side=side, sorter=sorter)
- def fillna(self, value=None, method=None, limit=None) -> PeriodArray:
- if method is not None:
- # view as dt64 so we get treated as timelike in core.missing,
- # similar to dtl._period_dispatch
- dta = self.view("M8[ns]")
- result = dta.fillna(value=value, method=method, limit=limit)
- # error: Incompatible return value type (got "Union[ExtensionArray,
- # ndarray[Any, Any]]", expected "PeriodArray")
- return result.view(self.dtype) # type: ignore[return-value]
- return super().fillna(value=value, method=method, limit=limit)
- # ------------------------------------------------------------------
- # Arithmetic Methods
- def _addsub_int_array_or_scalar(
- self, other: np.ndarray | int, op: Callable[[Any, Any], Any]
- ) -> PeriodArray:
- """
- Add or subtract array of integers.
- Parameters
- ----------
- other : np.ndarray[int64] or int
- op : {operator.add, operator.sub}
- Returns
- -------
- result : PeriodArray
- """
- assert op in [operator.add, operator.sub]
- if op is operator.sub:
- other = -other
- res_values = algos.checked_add_with_arr(self.asi8, other, arr_mask=self._isnan)
- return type(self)(res_values, freq=self.freq)
- def _add_offset(self, other: BaseOffset):
- assert not isinstance(other, Tick)
- self._require_matching_freq(other, base=True)
- return self._addsub_int_array_or_scalar(other.n, operator.add)
- # TODO: can we de-duplicate with Period._add_timedeltalike_scalar?
- def _add_timedeltalike_scalar(self, other):
- """
- Parameters
- ----------
- other : timedelta, Tick, np.timedelta64
- Returns
- -------
- PeriodArray
- """
- if not isinstance(self.freq, Tick):
- # We cannot add timedelta-like to non-tick PeriodArray
- raise raise_on_incompatible(self, other)
- if isna(other):
- # i.e. np.timedelta64("NaT")
- return super()._add_timedeltalike_scalar(other)
- td = np.asarray(Timedelta(other).asm8)
- return self._add_timedelta_arraylike(td)
- def _add_timedelta_arraylike(
- self, other: TimedeltaArray | npt.NDArray[np.timedelta64]
- ) -> PeriodArray:
- """
- Parameters
- ----------
- other : TimedeltaArray or ndarray[timedelta64]
- Returns
- -------
- PeriodArray
- """
- freq = self.freq
- if not isinstance(freq, Tick):
- # We cannot add timedelta-like to non-tick PeriodArray
- raise TypeError(
- f"Cannot add or subtract timedelta64[ns] dtype from {self.dtype}"
- )
- dtype = np.dtype(f"m8[{freq._td64_unit}]")
- try:
- delta = astype_overflowsafe(
- np.asarray(other), dtype=dtype, copy=False, round_ok=False
- )
- except ValueError as err:
- # e.g. if we have minutes freq and try to add 30s
- # "Cannot losslessly convert units"
- raise IncompatibleFrequency(
- "Cannot add/subtract timedelta-like from PeriodArray that is "
- "not an integer multiple of the PeriodArray's freq."
- ) from err
- b_mask = np.isnat(delta)
- res_values = algos.checked_add_with_arr(
- self.asi8, delta.view("i8"), arr_mask=self._isnan, b_mask=b_mask
- )
- np.putmask(res_values, self._isnan | b_mask, iNaT)
- return type(self)(res_values, freq=self.freq)
- def _check_timedeltalike_freq_compat(self, other):
- """
- Arithmetic operations with timedelta-like scalars or array `other`
- are only valid if `other` is an integer multiple of `self.freq`.
- If the operation is valid, find that integer multiple. Otherwise,
- raise because the operation is invalid.
- Parameters
- ----------
- other : timedelta, np.timedelta64, Tick,
- ndarray[timedelta64], TimedeltaArray, TimedeltaIndex
- Returns
- -------
- multiple : int or ndarray[int64]
- Raises
- ------
- IncompatibleFrequency
- """
- assert isinstance(self.freq, Tick) # checked by calling function
- dtype = np.dtype(f"m8[{self.freq._td64_unit}]")
- if isinstance(other, (timedelta, np.timedelta64, Tick)):
- td = np.asarray(Timedelta(other).asm8)
- else:
- td = np.asarray(other)
- try:
- delta = astype_overflowsafe(td, dtype=dtype, copy=False, round_ok=False)
- except ValueError as err:
- raise raise_on_incompatible(self, other) from err
- delta = delta.view("i8")
- return lib.item_from_zerodim(delta)
- def raise_on_incompatible(left, right):
- """
- Helper function to render a consistent error message when raising
- IncompatibleFrequency.
- Parameters
- ----------
- left : PeriodArray
- right : None, DateOffset, Period, ndarray, or timedelta-like
- Returns
- -------
- IncompatibleFrequency
- Exception to be raised by the caller.
- """
- # GH#24283 error message format depends on whether right is scalar
- if isinstance(right, (np.ndarray, ABCTimedeltaArray)) or right is None:
- other_freq = None
- elif isinstance(right, (ABCPeriodIndex, PeriodArray, Period, BaseOffset)):
- other_freq = right.freqstr
- else:
- other_freq = delta_to_tick(Timedelta(right)).freqstr
- msg = DIFFERENT_FREQ.format(
- cls=type(left).__name__, own_freq=left.freqstr, other_freq=other_freq
- )
- return IncompatibleFrequency(msg)
- # -------------------------------------------------------------------
- # Constructor Helpers
- def period_array(
- data: Sequence[Period | str | None] | AnyArrayLike,
- freq: str | Tick | None = None,
- copy: bool = False,
- ) -> PeriodArray:
- """
- Construct a new PeriodArray from a sequence of Period scalars.
- Parameters
- ----------
- data : Sequence of Period objects
- A sequence of Period objects. These are required to all have
- the same ``freq.`` Missing values can be indicated by ``None``
- or ``pandas.NaT``.
- freq : str, Tick, or Offset
- The frequency of every element of the array. This can be specified
- to avoid inferring the `freq` from `data`.
- copy : bool, default False
- Whether to ensure a copy of the data is made.
- Returns
- -------
- PeriodArray
- See Also
- --------
- PeriodArray
- pandas.PeriodIndex
- Examples
- --------
- >>> period_array([pd.Period('2017', freq='A'),
- ... pd.Period('2018', freq='A')])
- <PeriodArray>
- ['2017', '2018']
- Length: 2, dtype: period[A-DEC]
- >>> period_array([pd.Period('2017', freq='A'),
- ... pd.Period('2018', freq='A'),
- ... pd.NaT])
- <PeriodArray>
- ['2017', '2018', 'NaT']
- Length: 3, dtype: period[A-DEC]
- Integers that look like years are handled
- >>> period_array([2000, 2001, 2002], freq='D')
- <PeriodArray>
- ['2000-01-01', '2001-01-01', '2002-01-01']
- Length: 3, dtype: period[D]
- Datetime-like strings may also be passed
- >>> period_array(['2000-Q1', '2000-Q2', '2000-Q3', '2000-Q4'], freq='Q')
- <PeriodArray>
- ['2000Q1', '2000Q2', '2000Q3', '2000Q4']
- Length: 4, dtype: period[Q-DEC]
- """
- data_dtype = getattr(data, "dtype", None)
- if is_datetime64_dtype(data_dtype):
- return PeriodArray._from_datetime64(data, freq)
- if is_period_dtype(data_dtype):
- return PeriodArray(data, freq=freq)
- # other iterable of some kind
- if not isinstance(data, (np.ndarray, list, tuple, ABCSeries)):
- data = list(data)
- arrdata = np.asarray(data)
- dtype: PeriodDtype | None
- if freq:
- dtype = PeriodDtype(freq)
- else:
- dtype = None
- if is_float_dtype(arrdata) and len(arrdata) > 0:
- raise TypeError("PeriodIndex does not allow floating point in construction")
- if is_integer_dtype(arrdata.dtype):
- arr = arrdata.astype(np.int64, copy=False)
- # error: Argument 2 to "from_ordinals" has incompatible type "Union[str,
- # Tick, None]"; expected "Union[timedelta, BaseOffset, str]"
- ordinals = libperiod.from_ordinals(arr, freq) # type: ignore[arg-type]
- return PeriodArray(ordinals, dtype=dtype)
- data = ensure_object(arrdata)
- return PeriodArray._from_sequence(data, dtype=dtype)
- @overload
- def validate_dtype_freq(dtype, freq: BaseOffsetT) -> BaseOffsetT:
- ...
- @overload
- def validate_dtype_freq(dtype, freq: timedelta | str | None) -> BaseOffset:
- ...
- def validate_dtype_freq(
- dtype, freq: BaseOffsetT | timedelta | str | None
- ) -> BaseOffsetT:
- """
- If both a dtype and a freq are available, ensure they match. If only
- dtype is available, extract the implied freq.
- Parameters
- ----------
- dtype : dtype
- freq : DateOffset or None
- Returns
- -------
- freq : DateOffset
- Raises
- ------
- ValueError : non-period dtype
- IncompatibleFrequency : mismatch between dtype and freq
- """
- if freq is not None:
- # error: Incompatible types in assignment (expression has type
- # "BaseOffset", variable has type "Union[BaseOffsetT, timedelta,
- # str, None]")
- freq = to_offset(freq) # type: ignore[assignment]
- if dtype is not None:
- dtype = pandas_dtype(dtype)
- if not is_period_dtype(dtype):
- raise ValueError("dtype must be PeriodDtype")
- if freq is None:
- freq = dtype.freq
- elif freq != dtype.freq:
- raise IncompatibleFrequency("specified freq and dtype are different")
- # error: Incompatible return value type (got "Union[BaseOffset, Any, None]",
- # expected "BaseOffset")
- return freq # type: ignore[return-value]
- def dt64arr_to_periodarr(
- data, freq, tz=None
- ) -> tuple[npt.NDArray[np.int64], BaseOffset]:
- """
- Convert an datetime-like array to values Period ordinals.
- Parameters
- ----------
- data : Union[Series[datetime64[ns]], DatetimeIndex, ndarray[datetime64ns]]
- freq : Optional[Union[str, Tick]]
- Must match the `freq` on the `data` if `data` is a DatetimeIndex
- or Series.
- tz : Optional[tzinfo]
- Returns
- -------
- ordinals : ndarray[int64]
- freq : Tick
- The frequency extracted from the Series or DatetimeIndex if that's
- used.
- """
- if not isinstance(data.dtype, np.dtype) or data.dtype.kind != "M":
- raise ValueError(f"Wrong dtype: {data.dtype}")
- if freq is None:
- if isinstance(data, ABCIndex):
- data, freq = data._values, data.freq
- elif isinstance(data, ABCSeries):
- data, freq = data._values, data.dt.freq
- elif isinstance(data, (ABCIndex, ABCSeries)):
- data = data._values
- reso = get_unit_from_dtype(data.dtype)
- freq = Period._maybe_convert_freq(freq)
- base = freq._period_dtype_code
- return c_dt64arr_to_periodarr(data.view("i8"), base, tz, reso=reso), freq
- def _get_ordinal_range(start, end, periods, freq, mult: int = 1):
- if com.count_not_none(start, end, periods) != 2:
- raise ValueError(
- "Of the three parameters: start, end, and periods, "
- "exactly two must be specified"
- )
- if freq is not None:
- freq = to_offset(freq)
- mult = freq.n
- if start is not None:
- start = Period(start, freq)
- if end is not None:
- end = Period(end, freq)
- is_start_per = isinstance(start, Period)
- is_end_per = isinstance(end, Period)
- if is_start_per and is_end_per and start.freq != end.freq:
- raise ValueError("start and end must have same freq")
- if start is NaT or end is NaT:
- raise ValueError("start and end must not be NaT")
- if freq is None:
- if is_start_per:
- freq = start.freq
- elif is_end_per:
- freq = end.freq
- else: # pragma: no cover
- raise ValueError("Could not infer freq from start/end")
- if periods is not None:
- periods = periods * mult
- if start is None:
- data = np.arange(
- end.ordinal - periods + mult, end.ordinal + 1, mult, dtype=np.int64
- )
- else:
- data = np.arange(
- start.ordinal, start.ordinal + periods, mult, dtype=np.int64
- )
- else:
- data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64)
- return data, freq
- def _range_from_fields(
- year=None,
- month=None,
- quarter=None,
- day=None,
- hour=None,
- minute=None,
- second=None,
- freq=None,
- ) -> tuple[np.ndarray, BaseOffset]:
- if hour is None:
- hour = 0
- if minute is None:
- minute = 0
- if second is None:
- second = 0
- if day is None:
- day = 1
- ordinals = []
- if quarter is not None:
- if freq is None:
- freq = to_offset("Q")
- base = FreqGroup.FR_QTR.value
- else:
- freq = to_offset(freq)
- base = libperiod.freq_to_dtype_code(freq)
- if base != FreqGroup.FR_QTR.value:
- raise AssertionError("base must equal FR_QTR")
- freqstr = freq.freqstr
- year, quarter = _make_field_arrays(year, quarter)
- for y, q in zip(year, quarter):
- y, m = parsing.quarter_to_myear(y, q, freqstr)
- val = libperiod.period_ordinal(y, m, 1, 1, 1, 1, 0, 0, base)
- ordinals.append(val)
- else:
- freq = to_offset(freq)
- base = libperiod.freq_to_dtype_code(freq)
- arrays = _make_field_arrays(year, month, day, hour, minute, second)
- for y, mth, d, h, mn, s in zip(*arrays):
- ordinals.append(libperiod.period_ordinal(y, mth, d, h, mn, s, 0, 0, base))
- return np.array(ordinals, dtype=np.int64), freq
- def _make_field_arrays(*fields) -> list[np.ndarray]:
- length = None
- for x in fields:
- if isinstance(x, (list, np.ndarray, ABCSeries)):
- if length is not None and len(x) != length:
- raise ValueError("Mismatched Period array lengths")
- if length is None:
- length = len(x)
- # error: Argument 2 to "repeat" has incompatible type "Optional[int]"; expected
- # "Union[Union[int, integer[Any]], Union[bool, bool_], ndarray, Sequence[Union[int,
- # integer[Any]]], Sequence[Union[bool, bool_]], Sequence[Sequence[Any]]]"
- return [
- np.asarray(x)
- if isinstance(x, (np.ndarray, list, ABCSeries))
- else np.repeat(x, length) # type: ignore[arg-type]
- for x in fields
- ]
|