period.py 34 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148
  1. from __future__ import annotations
  2. from datetime import timedelta
  3. import operator
  4. from typing import (
  5. TYPE_CHECKING,
  6. Any,
  7. Callable,
  8. Literal,
  9. Sequence,
  10. TypeVar,
  11. overload,
  12. )
  13. import numpy as np
  14. from pandas._libs import (
  15. algos as libalgos,
  16. lib,
  17. )
  18. from pandas._libs.arrays import NDArrayBacked
  19. from pandas._libs.tslibs import (
  20. BaseOffset,
  21. NaT,
  22. NaTType,
  23. Timedelta,
  24. astype_overflowsafe,
  25. dt64arr_to_periodarr as c_dt64arr_to_periodarr,
  26. get_unit_from_dtype,
  27. iNaT,
  28. parsing,
  29. period as libperiod,
  30. to_offset,
  31. )
  32. from pandas._libs.tslibs.dtypes import FreqGroup
  33. from pandas._libs.tslibs.fields import isleapyear_arr
  34. from pandas._libs.tslibs.offsets import (
  35. Tick,
  36. delta_to_tick,
  37. )
  38. from pandas._libs.tslibs.period import (
  39. DIFFERENT_FREQ,
  40. IncompatibleFrequency,
  41. Period,
  42. get_period_field_arr,
  43. period_asfreq_arr,
  44. )
  45. from pandas._typing import (
  46. AnyArrayLike,
  47. Dtype,
  48. NpDtype,
  49. npt,
  50. )
  51. from pandas.util._decorators import (
  52. cache_readonly,
  53. doc,
  54. )
  55. from pandas.core.dtypes.common import (
  56. ensure_object,
  57. is_datetime64_any_dtype,
  58. is_datetime64_dtype,
  59. is_dtype_equal,
  60. is_float_dtype,
  61. is_integer_dtype,
  62. is_period_dtype,
  63. pandas_dtype,
  64. )
  65. from pandas.core.dtypes.dtypes import PeriodDtype
  66. from pandas.core.dtypes.generic import (
  67. ABCIndex,
  68. ABCPeriodIndex,
  69. ABCSeries,
  70. ABCTimedeltaArray,
  71. )
  72. from pandas.core.dtypes.missing import isna
  73. import pandas.core.algorithms as algos
  74. from pandas.core.arrays import datetimelike as dtl
  75. import pandas.core.common as com
  76. if TYPE_CHECKING:
  77. from pandas._typing import (
  78. NumpySorter,
  79. NumpyValueArrayLike,
  80. )
  81. from pandas.core.arrays import (
  82. DatetimeArray,
  83. TimedeltaArray,
  84. )
  85. from pandas.core.arrays.base import ExtensionArray
  86. BaseOffsetT = TypeVar("BaseOffsetT", bound=BaseOffset)
  87. _shared_doc_kwargs = {
  88. "klass": "PeriodArray",
  89. }
  90. def _field_accessor(name: str, docstring=None):
  91. def f(self):
  92. base = self.freq._period_dtype_code
  93. result = get_period_field_arr(name, self.asi8, base)
  94. return result
  95. f.__name__ = name
  96. f.__doc__ = docstring
  97. return property(f)
  98. class PeriodArray(dtl.DatelikeOps, libperiod.PeriodMixin):
  99. """
  100. Pandas ExtensionArray for storing Period data.
  101. Users should use :func:`~pandas.period_array` to create new instances.
  102. Alternatively, :func:`~pandas.array` can be used to create new instances
  103. from a sequence of Period scalars.
  104. Parameters
  105. ----------
  106. values : Union[PeriodArray, Series[period], ndarray[int], PeriodIndex]
  107. The data to store. These should be arrays that can be directly
  108. converted to ordinals without inference or copy (PeriodArray,
  109. ndarray[int64]), or a box around such an array (Series[period],
  110. PeriodIndex).
  111. dtype : PeriodDtype, optional
  112. A PeriodDtype instance from which to extract a `freq`. If both
  113. `freq` and `dtype` are specified, then the frequencies must match.
  114. freq : str or DateOffset
  115. The `freq` to use for the array. Mostly applicable when `values`
  116. is an ndarray of integers, when `freq` is required. When `values`
  117. is a PeriodArray (or box around), it's checked that ``values.freq``
  118. matches `freq`.
  119. copy : bool, default False
  120. Whether to copy the ordinals before storing.
  121. Attributes
  122. ----------
  123. None
  124. Methods
  125. -------
  126. None
  127. See Also
  128. --------
  129. Period: Represents a period of time.
  130. PeriodIndex : Immutable Index for period data.
  131. period_range: Create a fixed-frequency PeriodArray.
  132. array: Construct a pandas array.
  133. Notes
  134. -----
  135. There are two components to a PeriodArray
  136. - ordinals : integer ndarray
  137. - freq : pd.tseries.offsets.Offset
  138. The values are physically stored as a 1-D ndarray of integers. These are
  139. called "ordinals" and represent some kind of offset from a base.
  140. The `freq` indicates the span covered by each element of the array.
  141. All elements in the PeriodArray have the same `freq`.
  142. """
  143. # array priority higher than numpy scalars
  144. __array_priority__ = 1000
  145. _typ = "periodarray" # ABCPeriodArray
  146. _internal_fill_value = np.int64(iNaT)
  147. _recognized_scalars = (Period,)
  148. _is_recognized_dtype = is_period_dtype # check_compatible_with checks freq match
  149. _infer_matches = ("period",)
  150. @property
  151. def _scalar_type(self) -> type[Period]:
  152. return Period
  153. # Names others delegate to us
  154. _other_ops: list[str] = []
  155. _bool_ops: list[str] = ["is_leap_year"]
  156. _object_ops: list[str] = ["start_time", "end_time", "freq"]
  157. _field_ops: list[str] = [
  158. "year",
  159. "month",
  160. "day",
  161. "hour",
  162. "minute",
  163. "second",
  164. "weekofyear",
  165. "weekday",
  166. "week",
  167. "dayofweek",
  168. "day_of_week",
  169. "dayofyear",
  170. "day_of_year",
  171. "quarter",
  172. "qyear",
  173. "days_in_month",
  174. "daysinmonth",
  175. ]
  176. _datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops
  177. _datetimelike_methods: list[str] = ["strftime", "to_timestamp", "asfreq"]
  178. _dtype: PeriodDtype
  179. # --------------------------------------------------------------------
  180. # Constructors
  181. def __init__(
  182. self, values, dtype: Dtype | None = None, freq=None, copy: bool = False
  183. ) -> None:
  184. freq = validate_dtype_freq(dtype, freq)
  185. if freq is not None:
  186. freq = Period._maybe_convert_freq(freq)
  187. if isinstance(values, ABCSeries):
  188. values = values._values
  189. if not isinstance(values, type(self)):
  190. raise TypeError("Incorrect dtype")
  191. elif isinstance(values, ABCPeriodIndex):
  192. values = values._values
  193. if isinstance(values, type(self)):
  194. if freq is not None and freq != values.freq:
  195. raise raise_on_incompatible(values, freq)
  196. values, freq = values._ndarray, values.freq
  197. values = np.array(values, dtype="int64", copy=copy)
  198. if freq is None:
  199. raise ValueError("freq is not specified and cannot be inferred")
  200. NDArrayBacked.__init__(self, values, PeriodDtype(freq))
  201. # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked"
  202. @classmethod
  203. def _simple_new( # type: ignore[override]
  204. cls,
  205. values: np.ndarray,
  206. freq: BaseOffset | None = None,
  207. dtype: Dtype | None = None,
  208. ) -> PeriodArray:
  209. # alias for PeriodArray.__init__
  210. assertion_msg = "Should be numpy array of type i8"
  211. assert isinstance(values, np.ndarray) and values.dtype == "i8", assertion_msg
  212. return cls(values, freq=freq, dtype=dtype)
  213. @classmethod
  214. def _from_sequence(
  215. cls: type[PeriodArray],
  216. scalars: Sequence[Period | None] | AnyArrayLike,
  217. *,
  218. dtype: Dtype | None = None,
  219. copy: bool = False,
  220. ) -> PeriodArray:
  221. if dtype and isinstance(dtype, PeriodDtype):
  222. freq = dtype.freq
  223. else:
  224. freq = None
  225. if isinstance(scalars, cls):
  226. validate_dtype_freq(scalars.dtype, freq)
  227. if copy:
  228. scalars = scalars.copy()
  229. return scalars
  230. periods = np.asarray(scalars, dtype=object)
  231. freq = freq or libperiod.extract_freq(periods)
  232. ordinals = libperiod.extract_ordinals(periods, freq)
  233. return cls(ordinals, freq=freq)
  234. @classmethod
  235. def _from_sequence_of_strings(
  236. cls, strings, *, dtype: Dtype | None = None, copy: bool = False
  237. ) -> PeriodArray:
  238. return cls._from_sequence(strings, dtype=dtype, copy=copy)
  239. @classmethod
  240. def _from_datetime64(cls, data, freq, tz=None) -> PeriodArray:
  241. """
  242. Construct a PeriodArray from a datetime64 array
  243. Parameters
  244. ----------
  245. data : ndarray[datetime64[ns], datetime64[ns, tz]]
  246. freq : str or Tick
  247. tz : tzinfo, optional
  248. Returns
  249. -------
  250. PeriodArray[freq]
  251. """
  252. data, freq = dt64arr_to_periodarr(data, freq, tz)
  253. return cls(data, freq=freq)
  254. @classmethod
  255. def _generate_range(cls, start, end, periods, freq, fields):
  256. periods = dtl.validate_periods(periods)
  257. if freq is not None:
  258. freq = Period._maybe_convert_freq(freq)
  259. field_count = len(fields)
  260. if start is not None or end is not None:
  261. if field_count > 0:
  262. raise ValueError(
  263. "Can either instantiate from fields or endpoints, but not both"
  264. )
  265. subarr, freq = _get_ordinal_range(start, end, periods, freq)
  266. elif field_count > 0:
  267. subarr, freq = _range_from_fields(freq=freq, **fields)
  268. else:
  269. raise ValueError("Not enough parameters to construct Period range")
  270. return subarr, freq
  271. # -----------------------------------------------------------------
  272. # DatetimeLike Interface
  273. # error: Argument 1 of "_unbox_scalar" is incompatible with supertype
  274. # "DatetimeLikeArrayMixin"; supertype defines the argument type as
  275. # "Union[Union[Period, Any, Timedelta], NaTType]"
  276. def _unbox_scalar( # type: ignore[override]
  277. self,
  278. value: Period | NaTType,
  279. ) -> np.int64:
  280. if value is NaT:
  281. # error: Item "Period" of "Union[Period, NaTType]" has no attribute "value"
  282. return np.int64(value._value) # type: ignore[union-attr]
  283. elif isinstance(value, self._scalar_type):
  284. self._check_compatible_with(value)
  285. return np.int64(value.ordinal)
  286. else:
  287. raise ValueError(f"'value' should be a Period. Got '{value}' instead.")
  288. def _scalar_from_string(self, value: str) -> Period:
  289. return Period(value, freq=self.freq)
  290. def _check_compatible_with(self, other) -> None:
  291. if other is NaT:
  292. return
  293. self._require_matching_freq(other)
  294. # --------------------------------------------------------------------
  295. # Data / Attributes
  296. @cache_readonly
  297. def dtype(self) -> PeriodDtype:
  298. return self._dtype
  299. # error: Cannot override writeable attribute with read-only property
  300. @property # type: ignore[override]
  301. def freq(self) -> BaseOffset:
  302. """
  303. Return the frequency object for this PeriodArray.
  304. """
  305. return self.dtype.freq
  306. def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
  307. if dtype == "i8":
  308. return self.asi8
  309. elif dtype == bool:
  310. return ~self._isnan
  311. # This will raise TypeError for non-object dtypes
  312. return np.array(list(self), dtype=object)
  313. def __arrow_array__(self, type=None):
  314. """
  315. Convert myself into a pyarrow Array.
  316. """
  317. import pyarrow
  318. from pandas.core.arrays.arrow.extension_types import ArrowPeriodType
  319. if type is not None:
  320. if pyarrow.types.is_integer(type):
  321. return pyarrow.array(self._ndarray, mask=self.isna(), type=type)
  322. elif isinstance(type, ArrowPeriodType):
  323. # ensure we have the same freq
  324. if self.freqstr != type.freq:
  325. raise TypeError(
  326. "Not supported to convert PeriodArray to array with different "
  327. f"'freq' ({self.freqstr} vs {type.freq})"
  328. )
  329. else:
  330. raise TypeError(
  331. f"Not supported to convert PeriodArray to '{type}' type"
  332. )
  333. period_type = ArrowPeriodType(self.freqstr)
  334. storage_array = pyarrow.array(self._ndarray, mask=self.isna(), type="int64")
  335. return pyarrow.ExtensionArray.from_storage(period_type, storage_array)
  336. # --------------------------------------------------------------------
  337. # Vectorized analogues of Period properties
  338. year = _field_accessor(
  339. "year",
  340. """
  341. The year of the period.
  342. """,
  343. )
  344. month = _field_accessor(
  345. "month",
  346. """
  347. The month as January=1, December=12.
  348. """,
  349. )
  350. day = _field_accessor(
  351. "day",
  352. """
  353. The days of the period.
  354. """,
  355. )
  356. hour = _field_accessor(
  357. "hour",
  358. """
  359. The hour of the period.
  360. """,
  361. )
  362. minute = _field_accessor(
  363. "minute",
  364. """
  365. The minute of the period.
  366. """,
  367. )
  368. second = _field_accessor(
  369. "second",
  370. """
  371. The second of the period.
  372. """,
  373. )
  374. weekofyear = _field_accessor(
  375. "week",
  376. """
  377. The week ordinal of the year.
  378. """,
  379. )
  380. week = weekofyear
  381. day_of_week = _field_accessor(
  382. "day_of_week",
  383. """
  384. The day of the week with Monday=0, Sunday=6.
  385. """,
  386. )
  387. dayofweek = day_of_week
  388. weekday = dayofweek
  389. dayofyear = day_of_year = _field_accessor(
  390. "day_of_year",
  391. """
  392. The ordinal day of the year.
  393. """,
  394. )
  395. quarter = _field_accessor(
  396. "quarter",
  397. """
  398. The quarter of the date.
  399. """,
  400. )
  401. qyear = _field_accessor("qyear")
  402. days_in_month = _field_accessor(
  403. "days_in_month",
  404. """
  405. The number of days in the month.
  406. """,
  407. )
  408. daysinmonth = days_in_month
  409. @property
  410. def is_leap_year(self) -> np.ndarray:
  411. """
  412. Logical indicating if the date belongs to a leap year.
  413. """
  414. return isleapyear_arr(np.asarray(self.year))
  415. def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray:
  416. """
  417. Cast to DatetimeArray/Index.
  418. Parameters
  419. ----------
  420. freq : str or DateOffset, optional
  421. Target frequency. The default is 'D' for week or longer,
  422. 'S' otherwise.
  423. how : {'s', 'e', 'start', 'end'}
  424. Whether to use the start or end of the time period being converted.
  425. Returns
  426. -------
  427. DatetimeArray/Index
  428. """
  429. from pandas.core.arrays import DatetimeArray
  430. how = libperiod.validate_end_alias(how)
  431. end = how == "E"
  432. if end:
  433. if freq == "B" or self.freq == "B":
  434. # roll forward to ensure we land on B date
  435. adjust = Timedelta(1, "D") - Timedelta(1, "ns")
  436. return self.to_timestamp(how="start") + adjust
  437. else:
  438. adjust = Timedelta(1, "ns")
  439. return (self + self.freq).to_timestamp(how="start") - adjust
  440. if freq is None:
  441. freq = self._dtype._get_to_timestamp_base()
  442. base = freq
  443. else:
  444. freq = Period._maybe_convert_freq(freq)
  445. base = freq._period_dtype_code
  446. new_parr = self.asfreq(freq, how=how)
  447. new_data = libperiod.periodarr_to_dt64arr(new_parr.asi8, base)
  448. dta = DatetimeArray(new_data)
  449. if self.freq.name == "B":
  450. # See if we can retain BDay instead of Day in cases where
  451. # len(self) is too small for infer_freq to distinguish between them
  452. diffs = libalgos.unique_deltas(self.asi8)
  453. if len(diffs) == 1:
  454. diff = diffs[0]
  455. if diff == self.freq.n:
  456. dta._freq = self.freq
  457. elif diff == 1:
  458. dta._freq = self.freq.base
  459. # TODO: other cases?
  460. return dta
  461. else:
  462. return dta._with_freq("infer")
  463. # --------------------------------------------------------------------
  464. def _box_func(self, x) -> Period | NaTType:
  465. return Period._from_ordinal(ordinal=x, freq=self.freq)
  466. @doc(**_shared_doc_kwargs, other="PeriodIndex", other_name="PeriodIndex")
  467. def asfreq(self, freq=None, how: str = "E") -> PeriodArray:
  468. """
  469. Convert the {klass} to the specified frequency `freq`.
  470. Equivalent to applying :meth:`pandas.Period.asfreq` with the given arguments
  471. to each :class:`~pandas.Period` in this {klass}.
  472. Parameters
  473. ----------
  474. freq : str
  475. A frequency.
  476. how : str {{'E', 'S'}}, default 'E'
  477. Whether the elements should be aligned to the end
  478. or start within pa period.
  479. * 'E', 'END', or 'FINISH' for end,
  480. * 'S', 'START', or 'BEGIN' for start.
  481. January 31st ('END') vs. January 1st ('START') for example.
  482. Returns
  483. -------
  484. {klass}
  485. The transformed {klass} with the new frequency.
  486. See Also
  487. --------
  488. {other}.asfreq: Convert each Period in a {other_name} to the given frequency.
  489. Period.asfreq : Convert a :class:`~pandas.Period` object to the given frequency.
  490. Examples
  491. --------
  492. >>> pidx = pd.period_range('2010-01-01', '2015-01-01', freq='A')
  493. >>> pidx
  494. PeriodIndex(['2010', '2011', '2012', '2013', '2014', '2015'],
  495. dtype='period[A-DEC]')
  496. >>> pidx.asfreq('M')
  497. PeriodIndex(['2010-12', '2011-12', '2012-12', '2013-12', '2014-12',
  498. '2015-12'], dtype='period[M]')
  499. >>> pidx.asfreq('M', how='S')
  500. PeriodIndex(['2010-01', '2011-01', '2012-01', '2013-01', '2014-01',
  501. '2015-01'], dtype='period[M]')
  502. """
  503. how = libperiod.validate_end_alias(how)
  504. freq = Period._maybe_convert_freq(freq)
  505. base1 = self._dtype._dtype_code
  506. base2 = freq._period_dtype_code
  507. asi8 = self.asi8
  508. # self.freq.n can't be negative or 0
  509. end = how == "E"
  510. if end:
  511. ordinal = asi8 + self.freq.n - 1
  512. else:
  513. ordinal = asi8
  514. new_data = period_asfreq_arr(ordinal, base1, base2, end)
  515. if self._hasna:
  516. new_data[self._isnan] = iNaT
  517. return type(self)(new_data, freq=freq)
  518. # ------------------------------------------------------------------
  519. # Rendering Methods
  520. def _formatter(self, boxed: bool = False):
  521. if boxed:
  522. return str
  523. return "'{}'".format
  524. @dtl.ravel_compat
  525. def _format_native_types(
  526. self, *, na_rep: str | float = "NaT", date_format=None, **kwargs
  527. ) -> npt.NDArray[np.object_]:
  528. """
  529. actually format my specific types
  530. """
  531. values = self.astype(object)
  532. # Create the formatter function
  533. if date_format:
  534. formatter = lambda per: per.strftime(date_format)
  535. else:
  536. # Uses `_Period.str` which in turn uses `format_period`
  537. formatter = lambda per: str(per)
  538. # Apply the formatter to all values in the array, possibly with a mask
  539. if self._hasna:
  540. mask = self._isnan
  541. values[mask] = na_rep
  542. imask = ~mask
  543. values[imask] = np.array([formatter(per) for per in values[imask]])
  544. else:
  545. values = np.array([formatter(per) for per in values])
  546. return values
  547. # ------------------------------------------------------------------
  548. def astype(self, dtype, copy: bool = True):
  549. # We handle Period[T] -> Period[U]
  550. # Our parent handles everything else.
  551. dtype = pandas_dtype(dtype)
  552. if is_dtype_equal(dtype, self._dtype):
  553. if not copy:
  554. return self
  555. else:
  556. return self.copy()
  557. if is_period_dtype(dtype):
  558. return self.asfreq(dtype.freq)
  559. if is_datetime64_any_dtype(dtype):
  560. # GH#45038 match PeriodIndex behavior.
  561. tz = getattr(dtype, "tz", None)
  562. return self.to_timestamp().tz_localize(tz)
  563. return super().astype(dtype, copy=copy)
  564. def searchsorted(
  565. self,
  566. value: NumpyValueArrayLike | ExtensionArray,
  567. side: Literal["left", "right"] = "left",
  568. sorter: NumpySorter = None,
  569. ) -> npt.NDArray[np.intp] | np.intp:
  570. npvalue = self._validate_setitem_value(value).view("M8[ns]")
  571. # Cast to M8 to get datetime-like NaT placement,
  572. # similar to dtl._period_dispatch
  573. m8arr = self._ndarray.view("M8[ns]")
  574. return m8arr.searchsorted(npvalue, side=side, sorter=sorter)
  575. def fillna(self, value=None, method=None, limit=None) -> PeriodArray:
  576. if method is not None:
  577. # view as dt64 so we get treated as timelike in core.missing,
  578. # similar to dtl._period_dispatch
  579. dta = self.view("M8[ns]")
  580. result = dta.fillna(value=value, method=method, limit=limit)
  581. # error: Incompatible return value type (got "Union[ExtensionArray,
  582. # ndarray[Any, Any]]", expected "PeriodArray")
  583. return result.view(self.dtype) # type: ignore[return-value]
  584. return super().fillna(value=value, method=method, limit=limit)
  585. # ------------------------------------------------------------------
  586. # Arithmetic Methods
  587. def _addsub_int_array_or_scalar(
  588. self, other: np.ndarray | int, op: Callable[[Any, Any], Any]
  589. ) -> PeriodArray:
  590. """
  591. Add or subtract array of integers.
  592. Parameters
  593. ----------
  594. other : np.ndarray[int64] or int
  595. op : {operator.add, operator.sub}
  596. Returns
  597. -------
  598. result : PeriodArray
  599. """
  600. assert op in [operator.add, operator.sub]
  601. if op is operator.sub:
  602. other = -other
  603. res_values = algos.checked_add_with_arr(self.asi8, other, arr_mask=self._isnan)
  604. return type(self)(res_values, freq=self.freq)
  605. def _add_offset(self, other: BaseOffset):
  606. assert not isinstance(other, Tick)
  607. self._require_matching_freq(other, base=True)
  608. return self._addsub_int_array_or_scalar(other.n, operator.add)
  609. # TODO: can we de-duplicate with Period._add_timedeltalike_scalar?
  610. def _add_timedeltalike_scalar(self, other):
  611. """
  612. Parameters
  613. ----------
  614. other : timedelta, Tick, np.timedelta64
  615. Returns
  616. -------
  617. PeriodArray
  618. """
  619. if not isinstance(self.freq, Tick):
  620. # We cannot add timedelta-like to non-tick PeriodArray
  621. raise raise_on_incompatible(self, other)
  622. if isna(other):
  623. # i.e. np.timedelta64("NaT")
  624. return super()._add_timedeltalike_scalar(other)
  625. td = np.asarray(Timedelta(other).asm8)
  626. return self._add_timedelta_arraylike(td)
  627. def _add_timedelta_arraylike(
  628. self, other: TimedeltaArray | npt.NDArray[np.timedelta64]
  629. ) -> PeriodArray:
  630. """
  631. Parameters
  632. ----------
  633. other : TimedeltaArray or ndarray[timedelta64]
  634. Returns
  635. -------
  636. PeriodArray
  637. """
  638. freq = self.freq
  639. if not isinstance(freq, Tick):
  640. # We cannot add timedelta-like to non-tick PeriodArray
  641. raise TypeError(
  642. f"Cannot add or subtract timedelta64[ns] dtype from {self.dtype}"
  643. )
  644. dtype = np.dtype(f"m8[{freq._td64_unit}]")
  645. try:
  646. delta = astype_overflowsafe(
  647. np.asarray(other), dtype=dtype, copy=False, round_ok=False
  648. )
  649. except ValueError as err:
  650. # e.g. if we have minutes freq and try to add 30s
  651. # "Cannot losslessly convert units"
  652. raise IncompatibleFrequency(
  653. "Cannot add/subtract timedelta-like from PeriodArray that is "
  654. "not an integer multiple of the PeriodArray's freq."
  655. ) from err
  656. b_mask = np.isnat(delta)
  657. res_values = algos.checked_add_with_arr(
  658. self.asi8, delta.view("i8"), arr_mask=self._isnan, b_mask=b_mask
  659. )
  660. np.putmask(res_values, self._isnan | b_mask, iNaT)
  661. return type(self)(res_values, freq=self.freq)
  662. def _check_timedeltalike_freq_compat(self, other):
  663. """
  664. Arithmetic operations with timedelta-like scalars or array `other`
  665. are only valid if `other` is an integer multiple of `self.freq`.
  666. If the operation is valid, find that integer multiple. Otherwise,
  667. raise because the operation is invalid.
  668. Parameters
  669. ----------
  670. other : timedelta, np.timedelta64, Tick,
  671. ndarray[timedelta64], TimedeltaArray, TimedeltaIndex
  672. Returns
  673. -------
  674. multiple : int or ndarray[int64]
  675. Raises
  676. ------
  677. IncompatibleFrequency
  678. """
  679. assert isinstance(self.freq, Tick) # checked by calling function
  680. dtype = np.dtype(f"m8[{self.freq._td64_unit}]")
  681. if isinstance(other, (timedelta, np.timedelta64, Tick)):
  682. td = np.asarray(Timedelta(other).asm8)
  683. else:
  684. td = np.asarray(other)
  685. try:
  686. delta = astype_overflowsafe(td, dtype=dtype, copy=False, round_ok=False)
  687. except ValueError as err:
  688. raise raise_on_incompatible(self, other) from err
  689. delta = delta.view("i8")
  690. return lib.item_from_zerodim(delta)
  691. def raise_on_incompatible(left, right):
  692. """
  693. Helper function to render a consistent error message when raising
  694. IncompatibleFrequency.
  695. Parameters
  696. ----------
  697. left : PeriodArray
  698. right : None, DateOffset, Period, ndarray, or timedelta-like
  699. Returns
  700. -------
  701. IncompatibleFrequency
  702. Exception to be raised by the caller.
  703. """
  704. # GH#24283 error message format depends on whether right is scalar
  705. if isinstance(right, (np.ndarray, ABCTimedeltaArray)) or right is None:
  706. other_freq = None
  707. elif isinstance(right, (ABCPeriodIndex, PeriodArray, Period, BaseOffset)):
  708. other_freq = right.freqstr
  709. else:
  710. other_freq = delta_to_tick(Timedelta(right)).freqstr
  711. msg = DIFFERENT_FREQ.format(
  712. cls=type(left).__name__, own_freq=left.freqstr, other_freq=other_freq
  713. )
  714. return IncompatibleFrequency(msg)
  715. # -------------------------------------------------------------------
  716. # Constructor Helpers
  717. def period_array(
  718. data: Sequence[Period | str | None] | AnyArrayLike,
  719. freq: str | Tick | None = None,
  720. copy: bool = False,
  721. ) -> PeriodArray:
  722. """
  723. Construct a new PeriodArray from a sequence of Period scalars.
  724. Parameters
  725. ----------
  726. data : Sequence of Period objects
  727. A sequence of Period objects. These are required to all have
  728. the same ``freq.`` Missing values can be indicated by ``None``
  729. or ``pandas.NaT``.
  730. freq : str, Tick, or Offset
  731. The frequency of every element of the array. This can be specified
  732. to avoid inferring the `freq` from `data`.
  733. copy : bool, default False
  734. Whether to ensure a copy of the data is made.
  735. Returns
  736. -------
  737. PeriodArray
  738. See Also
  739. --------
  740. PeriodArray
  741. pandas.PeriodIndex
  742. Examples
  743. --------
  744. >>> period_array([pd.Period('2017', freq='A'),
  745. ... pd.Period('2018', freq='A')])
  746. <PeriodArray>
  747. ['2017', '2018']
  748. Length: 2, dtype: period[A-DEC]
  749. >>> period_array([pd.Period('2017', freq='A'),
  750. ... pd.Period('2018', freq='A'),
  751. ... pd.NaT])
  752. <PeriodArray>
  753. ['2017', '2018', 'NaT']
  754. Length: 3, dtype: period[A-DEC]
  755. Integers that look like years are handled
  756. >>> period_array([2000, 2001, 2002], freq='D')
  757. <PeriodArray>
  758. ['2000-01-01', '2001-01-01', '2002-01-01']
  759. Length: 3, dtype: period[D]
  760. Datetime-like strings may also be passed
  761. >>> period_array(['2000-Q1', '2000-Q2', '2000-Q3', '2000-Q4'], freq='Q')
  762. <PeriodArray>
  763. ['2000Q1', '2000Q2', '2000Q3', '2000Q4']
  764. Length: 4, dtype: period[Q-DEC]
  765. """
  766. data_dtype = getattr(data, "dtype", None)
  767. if is_datetime64_dtype(data_dtype):
  768. return PeriodArray._from_datetime64(data, freq)
  769. if is_period_dtype(data_dtype):
  770. return PeriodArray(data, freq=freq)
  771. # other iterable of some kind
  772. if not isinstance(data, (np.ndarray, list, tuple, ABCSeries)):
  773. data = list(data)
  774. arrdata = np.asarray(data)
  775. dtype: PeriodDtype | None
  776. if freq:
  777. dtype = PeriodDtype(freq)
  778. else:
  779. dtype = None
  780. if is_float_dtype(arrdata) and len(arrdata) > 0:
  781. raise TypeError("PeriodIndex does not allow floating point in construction")
  782. if is_integer_dtype(arrdata.dtype):
  783. arr = arrdata.astype(np.int64, copy=False)
  784. # error: Argument 2 to "from_ordinals" has incompatible type "Union[str,
  785. # Tick, None]"; expected "Union[timedelta, BaseOffset, str]"
  786. ordinals = libperiod.from_ordinals(arr, freq) # type: ignore[arg-type]
  787. return PeriodArray(ordinals, dtype=dtype)
  788. data = ensure_object(arrdata)
  789. return PeriodArray._from_sequence(data, dtype=dtype)
  790. @overload
  791. def validate_dtype_freq(dtype, freq: BaseOffsetT) -> BaseOffsetT:
  792. ...
  793. @overload
  794. def validate_dtype_freq(dtype, freq: timedelta | str | None) -> BaseOffset:
  795. ...
  796. def validate_dtype_freq(
  797. dtype, freq: BaseOffsetT | timedelta | str | None
  798. ) -> BaseOffsetT:
  799. """
  800. If both a dtype and a freq are available, ensure they match. If only
  801. dtype is available, extract the implied freq.
  802. Parameters
  803. ----------
  804. dtype : dtype
  805. freq : DateOffset or None
  806. Returns
  807. -------
  808. freq : DateOffset
  809. Raises
  810. ------
  811. ValueError : non-period dtype
  812. IncompatibleFrequency : mismatch between dtype and freq
  813. """
  814. if freq is not None:
  815. # error: Incompatible types in assignment (expression has type
  816. # "BaseOffset", variable has type "Union[BaseOffsetT, timedelta,
  817. # str, None]")
  818. freq = to_offset(freq) # type: ignore[assignment]
  819. if dtype is not None:
  820. dtype = pandas_dtype(dtype)
  821. if not is_period_dtype(dtype):
  822. raise ValueError("dtype must be PeriodDtype")
  823. if freq is None:
  824. freq = dtype.freq
  825. elif freq != dtype.freq:
  826. raise IncompatibleFrequency("specified freq and dtype are different")
  827. # error: Incompatible return value type (got "Union[BaseOffset, Any, None]",
  828. # expected "BaseOffset")
  829. return freq # type: ignore[return-value]
  830. def dt64arr_to_periodarr(
  831. data, freq, tz=None
  832. ) -> tuple[npt.NDArray[np.int64], BaseOffset]:
  833. """
  834. Convert an datetime-like array to values Period ordinals.
  835. Parameters
  836. ----------
  837. data : Union[Series[datetime64[ns]], DatetimeIndex, ndarray[datetime64ns]]
  838. freq : Optional[Union[str, Tick]]
  839. Must match the `freq` on the `data` if `data` is a DatetimeIndex
  840. or Series.
  841. tz : Optional[tzinfo]
  842. Returns
  843. -------
  844. ordinals : ndarray[int64]
  845. freq : Tick
  846. The frequency extracted from the Series or DatetimeIndex if that's
  847. used.
  848. """
  849. if not isinstance(data.dtype, np.dtype) or data.dtype.kind != "M":
  850. raise ValueError(f"Wrong dtype: {data.dtype}")
  851. if freq is None:
  852. if isinstance(data, ABCIndex):
  853. data, freq = data._values, data.freq
  854. elif isinstance(data, ABCSeries):
  855. data, freq = data._values, data.dt.freq
  856. elif isinstance(data, (ABCIndex, ABCSeries)):
  857. data = data._values
  858. reso = get_unit_from_dtype(data.dtype)
  859. freq = Period._maybe_convert_freq(freq)
  860. base = freq._period_dtype_code
  861. return c_dt64arr_to_periodarr(data.view("i8"), base, tz, reso=reso), freq
  862. def _get_ordinal_range(start, end, periods, freq, mult: int = 1):
  863. if com.count_not_none(start, end, periods) != 2:
  864. raise ValueError(
  865. "Of the three parameters: start, end, and periods, "
  866. "exactly two must be specified"
  867. )
  868. if freq is not None:
  869. freq = to_offset(freq)
  870. mult = freq.n
  871. if start is not None:
  872. start = Period(start, freq)
  873. if end is not None:
  874. end = Period(end, freq)
  875. is_start_per = isinstance(start, Period)
  876. is_end_per = isinstance(end, Period)
  877. if is_start_per and is_end_per and start.freq != end.freq:
  878. raise ValueError("start and end must have same freq")
  879. if start is NaT or end is NaT:
  880. raise ValueError("start and end must not be NaT")
  881. if freq is None:
  882. if is_start_per:
  883. freq = start.freq
  884. elif is_end_per:
  885. freq = end.freq
  886. else: # pragma: no cover
  887. raise ValueError("Could not infer freq from start/end")
  888. if periods is not None:
  889. periods = periods * mult
  890. if start is None:
  891. data = np.arange(
  892. end.ordinal - periods + mult, end.ordinal + 1, mult, dtype=np.int64
  893. )
  894. else:
  895. data = np.arange(
  896. start.ordinal, start.ordinal + periods, mult, dtype=np.int64
  897. )
  898. else:
  899. data = np.arange(start.ordinal, end.ordinal + 1, mult, dtype=np.int64)
  900. return data, freq
  901. def _range_from_fields(
  902. year=None,
  903. month=None,
  904. quarter=None,
  905. day=None,
  906. hour=None,
  907. minute=None,
  908. second=None,
  909. freq=None,
  910. ) -> tuple[np.ndarray, BaseOffset]:
  911. if hour is None:
  912. hour = 0
  913. if minute is None:
  914. minute = 0
  915. if second is None:
  916. second = 0
  917. if day is None:
  918. day = 1
  919. ordinals = []
  920. if quarter is not None:
  921. if freq is None:
  922. freq = to_offset("Q")
  923. base = FreqGroup.FR_QTR.value
  924. else:
  925. freq = to_offset(freq)
  926. base = libperiod.freq_to_dtype_code(freq)
  927. if base != FreqGroup.FR_QTR.value:
  928. raise AssertionError("base must equal FR_QTR")
  929. freqstr = freq.freqstr
  930. year, quarter = _make_field_arrays(year, quarter)
  931. for y, q in zip(year, quarter):
  932. y, m = parsing.quarter_to_myear(y, q, freqstr)
  933. val = libperiod.period_ordinal(y, m, 1, 1, 1, 1, 0, 0, base)
  934. ordinals.append(val)
  935. else:
  936. freq = to_offset(freq)
  937. base = libperiod.freq_to_dtype_code(freq)
  938. arrays = _make_field_arrays(year, month, day, hour, minute, second)
  939. for y, mth, d, h, mn, s in zip(*arrays):
  940. ordinals.append(libperiod.period_ordinal(y, mth, d, h, mn, s, 0, 0, base))
  941. return np.array(ordinals, dtype=np.int64), freq
  942. def _make_field_arrays(*fields) -> list[np.ndarray]:
  943. length = None
  944. for x in fields:
  945. if isinstance(x, (list, np.ndarray, ABCSeries)):
  946. if length is not None and len(x) != length:
  947. raise ValueError("Mismatched Period array lengths")
  948. if length is None:
  949. length = len(x)
  950. # error: Argument 2 to "repeat" has incompatible type "Optional[int]"; expected
  951. # "Union[Union[int, integer[Any]], Union[bool, bool_], ndarray, Sequence[Union[int,
  952. # integer[Any]]], Sequence[Union[bool, bool_]], Sequence[Sequence[Any]]]"
  953. return [
  954. np.asarray(x)
  955. if isinstance(x, (np.ndarray, list, ABCSeries))
  956. else np.repeat(x, length) # type: ignore[arg-type]
  957. for x in fields
  958. ]