rolling.py 87 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744
  1. """
  2. Provide a generic structure to support window functions,
  3. similar to how we have a Groupby object.
  4. """
  5. from __future__ import annotations
  6. import copy
  7. from datetime import timedelta
  8. from functools import partial
  9. import inspect
  10. from textwrap import dedent
  11. from typing import (
  12. TYPE_CHECKING,
  13. Any,
  14. Callable,
  15. Hashable,
  16. Iterator,
  17. Sized,
  18. cast,
  19. )
  20. import numpy as np
  21. from pandas._libs.tslibs import (
  22. BaseOffset,
  23. to_offset,
  24. )
  25. import pandas._libs.window.aggregations as window_aggregations
  26. from pandas._typing import (
  27. ArrayLike,
  28. Axis,
  29. NDFrameT,
  30. QuantileInterpolation,
  31. WindowingRankType,
  32. )
  33. from pandas.compat._optional import import_optional_dependency
  34. from pandas.errors import DataError
  35. from pandas.util._decorators import doc
  36. from pandas.core.dtypes.common import (
  37. ensure_float64,
  38. is_bool,
  39. is_integer,
  40. is_list_like,
  41. is_numeric_dtype,
  42. is_scalar,
  43. needs_i8_conversion,
  44. )
  45. from pandas.core.dtypes.generic import (
  46. ABCDataFrame,
  47. ABCSeries,
  48. )
  49. from pandas.core.dtypes.missing import notna
  50. from pandas.core._numba import executor
  51. from pandas.core.algorithms import factorize
  52. from pandas.core.apply import ResamplerWindowApply
  53. from pandas.core.arrays import ExtensionArray
  54. from pandas.core.base import SelectionMixin
  55. import pandas.core.common as com
  56. from pandas.core.indexers.objects import (
  57. BaseIndexer,
  58. FixedWindowIndexer,
  59. GroupbyIndexer,
  60. VariableWindowIndexer,
  61. )
  62. from pandas.core.indexes.api import (
  63. DatetimeIndex,
  64. Index,
  65. MultiIndex,
  66. PeriodIndex,
  67. TimedeltaIndex,
  68. )
  69. from pandas.core.reshape.concat import concat
  70. from pandas.core.util.numba_ import (
  71. get_jit_arguments,
  72. maybe_use_numba,
  73. )
  74. from pandas.core.window.common import (
  75. flex_binary_moment,
  76. zsqrt,
  77. )
  78. from pandas.core.window.doc import (
  79. _shared_docs,
  80. create_section_header,
  81. kwargs_numeric_only,
  82. kwargs_scipy,
  83. numba_notes,
  84. template_header,
  85. template_returns,
  86. template_see_also,
  87. window_agg_numba_parameters,
  88. window_apply_parameters,
  89. )
  90. from pandas.core.window.numba_ import (
  91. generate_manual_numpy_nan_agg_with_axis,
  92. generate_numba_apply_func,
  93. generate_numba_table_func,
  94. )
  95. if TYPE_CHECKING:
  96. from pandas import (
  97. DataFrame,
  98. Series,
  99. )
  100. from pandas.core.generic import NDFrame
  101. from pandas.core.groupby.ops import BaseGrouper
  102. class BaseWindow(SelectionMixin):
  103. """Provides utilities for performing windowing operations."""
  104. _attributes: list[str] = []
  105. exclusions: frozenset[Hashable] = frozenset()
  106. _on: Index
  107. def __init__(
  108. self,
  109. obj: NDFrame,
  110. window=None,
  111. min_periods: int | None = None,
  112. center: bool | None = False,
  113. win_type: str | None = None,
  114. axis: Axis = 0,
  115. on: str | Index | None = None,
  116. closed: str | None = None,
  117. step: int | None = None,
  118. method: str = "single",
  119. *,
  120. selection=None,
  121. ) -> None:
  122. self.obj = obj
  123. self.on = on
  124. self.closed = closed
  125. self.step = step
  126. self.window = window
  127. self.min_periods = min_periods
  128. self.center = center
  129. self.win_type = win_type
  130. self.axis = obj._get_axis_number(axis) if axis is not None else None
  131. self.method = method
  132. self._win_freq_i8: int | None = None
  133. if self.on is None:
  134. if self.axis == 0:
  135. self._on = self.obj.index
  136. else:
  137. # i.e. self.axis == 1
  138. self._on = self.obj.columns
  139. elif isinstance(self.on, Index):
  140. self._on = self.on
  141. elif isinstance(self.obj, ABCDataFrame) and self.on in self.obj.columns:
  142. self._on = Index(self.obj[self.on])
  143. else:
  144. raise ValueError(
  145. f"invalid on specified as {self.on}, "
  146. "must be a column (of DataFrame), an Index or None"
  147. )
  148. self._selection = selection
  149. self._validate()
  150. def _validate(self) -> None:
  151. if self.center is not None and not is_bool(self.center):
  152. raise ValueError("center must be a boolean")
  153. if self.min_periods is not None:
  154. if not is_integer(self.min_periods):
  155. raise ValueError("min_periods must be an integer")
  156. if self.min_periods < 0:
  157. raise ValueError("min_periods must be >= 0")
  158. if is_integer(self.window) and self.min_periods > self.window:
  159. raise ValueError(
  160. f"min_periods {self.min_periods} must be <= window {self.window}"
  161. )
  162. if self.closed is not None and self.closed not in [
  163. "right",
  164. "both",
  165. "left",
  166. "neither",
  167. ]:
  168. raise ValueError("closed must be 'right', 'left', 'both' or 'neither'")
  169. if not isinstance(self.obj, (ABCSeries, ABCDataFrame)):
  170. raise TypeError(f"invalid type: {type(self)}")
  171. if isinstance(self.window, BaseIndexer):
  172. # Validate that the passed BaseIndexer subclass has
  173. # a get_window_bounds with the correct signature.
  174. get_window_bounds_signature = inspect.signature(
  175. self.window.get_window_bounds
  176. ).parameters.keys()
  177. expected_signature = inspect.signature(
  178. BaseIndexer().get_window_bounds
  179. ).parameters.keys()
  180. if get_window_bounds_signature != expected_signature:
  181. raise ValueError(
  182. f"{type(self.window).__name__} does not implement "
  183. f"the correct signature for get_window_bounds"
  184. )
  185. if self.method not in ["table", "single"]:
  186. raise ValueError("method must be 'table' or 'single")
  187. if self.step is not None:
  188. if not is_integer(self.step):
  189. raise ValueError("step must be an integer")
  190. if self.step < 0:
  191. raise ValueError("step must be >= 0")
  192. def _check_window_bounds(
  193. self, start: np.ndarray, end: np.ndarray, num_vals: int
  194. ) -> None:
  195. if len(start) != len(end):
  196. raise ValueError(
  197. f"start ({len(start)}) and end ({len(end)}) bounds must be the "
  198. f"same length"
  199. )
  200. if len(start) != (num_vals + (self.step or 1) - 1) // (self.step or 1):
  201. raise ValueError(
  202. f"start and end bounds ({len(start)}) must be the same length "
  203. f"as the object ({num_vals}) divided by the step ({self.step}) "
  204. f"if given and rounded up"
  205. )
  206. def _slice_axis_for_step(self, index: Index, result: Sized | None = None) -> Index:
  207. """
  208. Slices the index for a given result and the preset step.
  209. """
  210. return (
  211. index
  212. if result is None or len(result) == len(index)
  213. else index[:: self.step]
  214. )
  215. def _validate_numeric_only(self, name: str, numeric_only: bool) -> None:
  216. """
  217. Validate numeric_only argument, raising if invalid for the input.
  218. Parameters
  219. ----------
  220. name : str
  221. Name of the operator (kernel).
  222. numeric_only : bool
  223. Value passed by user.
  224. """
  225. if (
  226. self._selected_obj.ndim == 1
  227. and numeric_only
  228. and not is_numeric_dtype(self._selected_obj.dtype)
  229. ):
  230. raise NotImplementedError(
  231. f"{type(self).__name__}.{name} does not implement numeric_only"
  232. )
  233. def _make_numeric_only(self, obj: NDFrameT) -> NDFrameT:
  234. """Subset DataFrame to numeric columns.
  235. Parameters
  236. ----------
  237. obj : DataFrame
  238. Returns
  239. -------
  240. obj subset to numeric-only columns.
  241. """
  242. result = obj.select_dtypes(include=["number"], exclude=["timedelta"])
  243. return result
  244. def _create_data(self, obj: NDFrameT, numeric_only: bool = False) -> NDFrameT:
  245. """
  246. Split data into blocks & return conformed data.
  247. """
  248. # filter out the on from the object
  249. if self.on is not None and not isinstance(self.on, Index) and obj.ndim == 2:
  250. obj = obj.reindex(columns=obj.columns.difference([self.on]), copy=False)
  251. if obj.ndim > 1 and (numeric_only or self.axis == 1):
  252. # GH: 20649 in case of mixed dtype and axis=1 we have to convert everything
  253. # to float to calculate the complete row at once. We exclude all non-numeric
  254. # dtypes.
  255. obj = self._make_numeric_only(obj)
  256. if self.axis == 1:
  257. obj = obj.astype("float64", copy=False)
  258. obj._mgr = obj._mgr.consolidate()
  259. return obj
  260. def _gotitem(self, key, ndim, subset=None):
  261. """
  262. Sub-classes to define. Return a sliced object.
  263. Parameters
  264. ----------
  265. key : str / list of selections
  266. ndim : {1, 2}
  267. requested ndim of result
  268. subset : object, default None
  269. subset to act on
  270. """
  271. # create a new object to prevent aliasing
  272. if subset is None:
  273. subset = self.obj
  274. # we need to make a shallow copy of ourselves
  275. # with the same groupby
  276. kwargs = {attr: getattr(self, attr) for attr in self._attributes}
  277. selection = None
  278. if subset.ndim == 2 and (
  279. (is_scalar(key) and key in subset) or is_list_like(key)
  280. ):
  281. selection = key
  282. elif subset.ndim == 1 and is_scalar(key) and key == subset.name:
  283. selection = key
  284. new_win = type(self)(subset, selection=selection, **kwargs)
  285. return new_win
  286. def __getattr__(self, attr: str):
  287. if attr in self._internal_names_set:
  288. return object.__getattribute__(self, attr)
  289. if attr in self.obj:
  290. return self[attr]
  291. raise AttributeError(
  292. f"'{type(self).__name__}' object has no attribute '{attr}'"
  293. )
  294. def _dir_additions(self):
  295. return self.obj._dir_additions()
  296. def __repr__(self) -> str:
  297. """
  298. Provide a nice str repr of our rolling object.
  299. """
  300. attrs_list = (
  301. f"{attr_name}={getattr(self, attr_name)}"
  302. for attr_name in self._attributes
  303. if getattr(self, attr_name, None) is not None and attr_name[0] != "_"
  304. )
  305. attrs = ",".join(attrs_list)
  306. return f"{type(self).__name__} [{attrs}]"
  307. def __iter__(self) -> Iterator:
  308. obj = self._selected_obj.set_axis(self._on)
  309. obj = self._create_data(obj)
  310. indexer = self._get_window_indexer()
  311. start, end = indexer.get_window_bounds(
  312. num_values=len(obj),
  313. min_periods=self.min_periods,
  314. center=self.center,
  315. closed=self.closed,
  316. step=self.step,
  317. )
  318. self._check_window_bounds(start, end, len(obj))
  319. for s, e in zip(start, end):
  320. result = obj.iloc[slice(s, e)]
  321. yield result
  322. def _prep_values(self, values: ArrayLike) -> np.ndarray:
  323. """Convert input to numpy arrays for Cython routines"""
  324. if needs_i8_conversion(values.dtype):
  325. raise NotImplementedError(
  326. f"ops for {type(self).__name__} for this "
  327. f"dtype {values.dtype} are not implemented"
  328. )
  329. # GH #12373 : rolling functions error on float32 data
  330. # make sure the data is coerced to float64
  331. try:
  332. if isinstance(values, ExtensionArray):
  333. values = values.to_numpy(np.float64, na_value=np.nan)
  334. else:
  335. values = ensure_float64(values)
  336. except (ValueError, TypeError) as err:
  337. raise TypeError(f"cannot handle this type -> {values.dtype}") from err
  338. # Convert inf to nan for C funcs
  339. inf = np.isinf(values)
  340. if inf.any():
  341. values = np.where(inf, np.nan, values)
  342. return values
  343. def _insert_on_column(self, result: DataFrame, obj: DataFrame) -> None:
  344. # if we have an 'on' column we want to put it back into
  345. # the results in the same location
  346. from pandas import Series
  347. if self.on is not None and not self._on.equals(obj.index):
  348. name = self._on.name
  349. extra_col = Series(self._on, index=self.obj.index, name=name, copy=False)
  350. if name in result.columns:
  351. # TODO: sure we want to overwrite results?
  352. result[name] = extra_col
  353. elif name in result.index.names:
  354. pass
  355. elif name in self._selected_obj.columns:
  356. # insert in the same location as we had in _selected_obj
  357. old_cols = self._selected_obj.columns
  358. new_cols = result.columns
  359. old_loc = old_cols.get_loc(name)
  360. overlap = new_cols.intersection(old_cols[:old_loc])
  361. new_loc = len(overlap)
  362. result.insert(new_loc, name, extra_col)
  363. else:
  364. # insert at the end
  365. result[name] = extra_col
  366. @property
  367. def _index_array(self):
  368. # TODO: why do we get here with e.g. MultiIndex?
  369. if needs_i8_conversion(self._on.dtype):
  370. idx = cast("PeriodIndex | DatetimeIndex | TimedeltaIndex", self._on)
  371. return idx.asi8
  372. return None
  373. def _resolve_output(self, out: DataFrame, obj: DataFrame) -> DataFrame:
  374. """Validate and finalize result."""
  375. if out.shape[1] == 0 and obj.shape[1] > 0:
  376. raise DataError("No numeric types to aggregate")
  377. if out.shape[1] == 0:
  378. return obj.astype("float64")
  379. self._insert_on_column(out, obj)
  380. return out
  381. def _get_window_indexer(self) -> BaseIndexer:
  382. """
  383. Return an indexer class that will compute the window start and end bounds
  384. """
  385. if isinstance(self.window, BaseIndexer):
  386. return self.window
  387. if self._win_freq_i8 is not None:
  388. return VariableWindowIndexer(
  389. index_array=self._index_array,
  390. window_size=self._win_freq_i8,
  391. center=self.center,
  392. )
  393. return FixedWindowIndexer(window_size=self.window)
  394. def _apply_series(
  395. self, homogeneous_func: Callable[..., ArrayLike], name: str | None = None
  396. ) -> Series:
  397. """
  398. Series version of _apply_blockwise
  399. """
  400. obj = self._create_data(self._selected_obj)
  401. if name == "count":
  402. # GH 12541: Special case for count where we support date-like types
  403. obj = notna(obj).astype(int)
  404. try:
  405. values = self._prep_values(obj._values)
  406. except (TypeError, NotImplementedError) as err:
  407. raise DataError("No numeric types to aggregate") from err
  408. result = homogeneous_func(values)
  409. index = self._slice_axis_for_step(obj.index, result)
  410. return obj._constructor(result, index=index, name=obj.name)
  411. def _apply_blockwise(
  412. self,
  413. homogeneous_func: Callable[..., ArrayLike],
  414. name: str,
  415. numeric_only: bool = False,
  416. ) -> DataFrame | Series:
  417. """
  418. Apply the given function to the DataFrame broken down into homogeneous
  419. sub-frames.
  420. """
  421. self._validate_numeric_only(name, numeric_only)
  422. if self._selected_obj.ndim == 1:
  423. return self._apply_series(homogeneous_func, name)
  424. obj = self._create_data(self._selected_obj, numeric_only)
  425. if name == "count":
  426. # GH 12541: Special case for count where we support date-like types
  427. obj = notna(obj).astype(int)
  428. obj._mgr = obj._mgr.consolidate()
  429. if self.axis == 1:
  430. obj = obj.T
  431. taker = []
  432. res_values = []
  433. for i, arr in enumerate(obj._iter_column_arrays()):
  434. # GH#42736 operate column-wise instead of block-wise
  435. # As of 2.0, hfunc will raise for nuisance columns
  436. try:
  437. arr = self._prep_values(arr)
  438. except (TypeError, NotImplementedError) as err:
  439. raise DataError(
  440. f"Cannot aggregate non-numeric type: {arr.dtype}"
  441. ) from err
  442. res = homogeneous_func(arr)
  443. res_values.append(res)
  444. taker.append(i)
  445. index = self._slice_axis_for_step(
  446. obj.index, res_values[0] if len(res_values) > 0 else None
  447. )
  448. df = type(obj)._from_arrays(
  449. res_values,
  450. index=index,
  451. columns=obj.columns.take(taker),
  452. verify_integrity=False,
  453. )
  454. if self.axis == 1:
  455. df = df.T
  456. return self._resolve_output(df, obj)
  457. def _apply_tablewise(
  458. self,
  459. homogeneous_func: Callable[..., ArrayLike],
  460. name: str | None = None,
  461. numeric_only: bool = False,
  462. ) -> DataFrame | Series:
  463. """
  464. Apply the given function to the DataFrame across the entire object
  465. """
  466. if self._selected_obj.ndim == 1:
  467. raise ValueError("method='table' not applicable for Series objects.")
  468. obj = self._create_data(self._selected_obj, numeric_only)
  469. values = self._prep_values(obj.to_numpy())
  470. values = values.T if self.axis == 1 else values
  471. result = homogeneous_func(values)
  472. result = result.T if self.axis == 1 else result
  473. index = self._slice_axis_for_step(obj.index, result)
  474. columns = (
  475. obj.columns
  476. if result.shape[1] == len(obj.columns)
  477. else obj.columns[:: self.step]
  478. )
  479. out = obj._constructor(result, index=index, columns=columns)
  480. return self._resolve_output(out, obj)
  481. def _apply_pairwise(
  482. self,
  483. target: DataFrame | Series,
  484. other: DataFrame | Series | None,
  485. pairwise: bool | None,
  486. func: Callable[[DataFrame | Series, DataFrame | Series], DataFrame | Series],
  487. numeric_only: bool,
  488. ) -> DataFrame | Series:
  489. """
  490. Apply the given pairwise function given 2 pandas objects (DataFrame/Series)
  491. """
  492. target = self._create_data(target, numeric_only)
  493. if other is None:
  494. other = target
  495. # only default unset
  496. pairwise = True if pairwise is None else pairwise
  497. elif not isinstance(other, (ABCDataFrame, ABCSeries)):
  498. raise ValueError("other must be a DataFrame or Series")
  499. elif other.ndim == 2 and numeric_only:
  500. other = self._make_numeric_only(other)
  501. return flex_binary_moment(target, other, func, pairwise=bool(pairwise))
  502. def _apply(
  503. self,
  504. func: Callable[..., Any],
  505. name: str,
  506. numeric_only: bool = False,
  507. numba_args: tuple[Any, ...] = (),
  508. **kwargs,
  509. ):
  510. """
  511. Rolling statistical measure using supplied function.
  512. Designed to be used with passed-in Cython array-based functions.
  513. Parameters
  514. ----------
  515. func : callable function to apply
  516. name : str,
  517. numba_args : tuple
  518. args to be passed when func is a numba func
  519. **kwargs
  520. additional arguments for rolling function and window function
  521. Returns
  522. -------
  523. y : type of input
  524. """
  525. window_indexer = self._get_window_indexer()
  526. min_periods = (
  527. self.min_periods
  528. if self.min_periods is not None
  529. else window_indexer.window_size
  530. )
  531. def homogeneous_func(values: np.ndarray):
  532. # calculation function
  533. if values.size == 0:
  534. return values.copy()
  535. def calc(x):
  536. start, end = window_indexer.get_window_bounds(
  537. num_values=len(x),
  538. min_periods=min_periods,
  539. center=self.center,
  540. closed=self.closed,
  541. step=self.step,
  542. )
  543. self._check_window_bounds(start, end, len(x))
  544. return func(x, start, end, min_periods, *numba_args)
  545. with np.errstate(all="ignore"):
  546. result = calc(values)
  547. return result
  548. if self.method == "single":
  549. return self._apply_blockwise(homogeneous_func, name, numeric_only)
  550. else:
  551. return self._apply_tablewise(homogeneous_func, name, numeric_only)
  552. def _numba_apply(
  553. self,
  554. func: Callable[..., Any],
  555. engine_kwargs: dict[str, bool] | None = None,
  556. *func_args,
  557. ):
  558. window_indexer = self._get_window_indexer()
  559. min_periods = (
  560. self.min_periods
  561. if self.min_periods is not None
  562. else window_indexer.window_size
  563. )
  564. obj = self._create_data(self._selected_obj)
  565. if self.axis == 1:
  566. obj = obj.T
  567. values = self._prep_values(obj.to_numpy())
  568. if values.ndim == 1:
  569. values = values.reshape(-1, 1)
  570. start, end = window_indexer.get_window_bounds(
  571. num_values=len(values),
  572. min_periods=min_periods,
  573. center=self.center,
  574. closed=self.closed,
  575. step=self.step,
  576. )
  577. self._check_window_bounds(start, end, len(values))
  578. aggregator = executor.generate_shared_aggregator(
  579. func, **get_jit_arguments(engine_kwargs)
  580. )
  581. result = aggregator(values, start, end, min_periods, *func_args)
  582. result = result.T if self.axis == 1 else result
  583. index = self._slice_axis_for_step(obj.index, result)
  584. if obj.ndim == 1:
  585. result = result.squeeze()
  586. out = obj._constructor(result, index=index, name=obj.name)
  587. return out
  588. else:
  589. columns = self._slice_axis_for_step(obj.columns, result.T)
  590. out = obj._constructor(result, index=index, columns=columns)
  591. return self._resolve_output(out, obj)
  592. def aggregate(self, func, *args, **kwargs):
  593. result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()
  594. if result is None:
  595. return self.apply(func, raw=False, args=args, kwargs=kwargs)
  596. return result
  597. agg = aggregate
  598. class BaseWindowGroupby(BaseWindow):
  599. """
  600. Provide the groupby windowing facilities.
  601. """
  602. _grouper: BaseGrouper
  603. _as_index: bool
  604. _attributes: list[str] = ["_grouper"]
  605. def __init__(
  606. self,
  607. obj: DataFrame | Series,
  608. *args,
  609. _grouper: BaseGrouper,
  610. _as_index: bool = True,
  611. **kwargs,
  612. ) -> None:
  613. from pandas.core.groupby.ops import BaseGrouper
  614. if not isinstance(_grouper, BaseGrouper):
  615. raise ValueError("Must pass a BaseGrouper object.")
  616. self._grouper = _grouper
  617. self._as_index = _as_index
  618. # GH 32262: It's convention to keep the grouping column in
  619. # groupby.<agg_func>, but unexpected to users in
  620. # groupby.rolling.<agg_func>
  621. obj = obj.drop(columns=self._grouper.names, errors="ignore")
  622. # GH 15354
  623. if kwargs.get("step") is not None:
  624. raise NotImplementedError("step not implemented for groupby")
  625. super().__init__(obj, *args, **kwargs)
  626. def _apply(
  627. self,
  628. func: Callable[..., Any],
  629. name: str,
  630. numeric_only: bool = False,
  631. numba_args: tuple[Any, ...] = (),
  632. **kwargs,
  633. ) -> DataFrame | Series:
  634. result = super()._apply(
  635. func,
  636. name,
  637. numeric_only,
  638. numba_args,
  639. **kwargs,
  640. )
  641. # Reconstruct the resulting MultiIndex
  642. # 1st set of levels = group by labels
  643. # 2nd set of levels = original DataFrame/Series index
  644. grouped_object_index = self.obj.index
  645. grouped_index_name = [*grouped_object_index.names]
  646. groupby_keys = copy.copy(self._grouper.names)
  647. result_index_names = groupby_keys + grouped_index_name
  648. drop_columns = [
  649. key
  650. for key in self._grouper.names
  651. if key not in self.obj.index.names or key is None
  652. ]
  653. if len(drop_columns) != len(groupby_keys):
  654. # Our result will have still kept the column in the result
  655. result = result.drop(columns=drop_columns, errors="ignore")
  656. codes = self._grouper.codes
  657. levels = copy.copy(self._grouper.levels)
  658. group_indices = self._grouper.indices.values()
  659. if group_indices:
  660. indexer = np.concatenate(list(group_indices))
  661. else:
  662. indexer = np.array([], dtype=np.intp)
  663. codes = [c.take(indexer) for c in codes]
  664. # if the index of the original dataframe needs to be preserved, append
  665. # this index (but reordered) to the codes/levels from the groupby
  666. if grouped_object_index is not None:
  667. idx = grouped_object_index.take(indexer)
  668. if not isinstance(idx, MultiIndex):
  669. idx = MultiIndex.from_arrays([idx])
  670. codes.extend(list(idx.codes))
  671. levels.extend(list(idx.levels))
  672. result_index = MultiIndex(
  673. levels, codes, names=result_index_names, verify_integrity=False
  674. )
  675. result.index = result_index
  676. if not self._as_index:
  677. result = result.reset_index(level=list(range(len(groupby_keys))))
  678. return result
  679. def _apply_pairwise(
  680. self,
  681. target: DataFrame | Series,
  682. other: DataFrame | Series | None,
  683. pairwise: bool | None,
  684. func: Callable[[DataFrame | Series, DataFrame | Series], DataFrame | Series],
  685. numeric_only: bool,
  686. ) -> DataFrame | Series:
  687. """
  688. Apply the given pairwise function given 2 pandas objects (DataFrame/Series)
  689. """
  690. # Manually drop the grouping column first
  691. target = target.drop(columns=self._grouper.names, errors="ignore")
  692. result = super()._apply_pairwise(target, other, pairwise, func, numeric_only)
  693. # 1) Determine the levels + codes of the groupby levels
  694. if other is not None and not all(
  695. len(group) == len(other) for group in self._grouper.indices.values()
  696. ):
  697. # GH 42915
  698. # len(other) != len(any group), so must reindex (expand) the result
  699. # from flex_binary_moment to a "transform"-like result
  700. # per groupby combination
  701. old_result_len = len(result)
  702. result = concat(
  703. [
  704. result.take(gb_indices).reindex(result.index)
  705. for gb_indices in self._grouper.indices.values()
  706. ]
  707. )
  708. gb_pairs = (
  709. com.maybe_make_list(pair) for pair in self._grouper.indices.keys()
  710. )
  711. groupby_codes = []
  712. groupby_levels = []
  713. # e.g. [[1, 2], [4, 5]] as [[1, 4], [2, 5]]
  714. for gb_level_pair in map(list, zip(*gb_pairs)):
  715. labels = np.repeat(np.array(gb_level_pair), old_result_len)
  716. codes, levels = factorize(labels)
  717. groupby_codes.append(codes)
  718. groupby_levels.append(levels)
  719. else:
  720. # pairwise=True or len(other) == len(each group), so repeat
  721. # the groupby labels by the number of columns in the original object
  722. groupby_codes = self._grouper.codes
  723. # error: Incompatible types in assignment (expression has type
  724. # "List[Index]", variable has type "List[Union[ndarray, Index]]")
  725. groupby_levels = self._grouper.levels # type: ignore[assignment]
  726. group_indices = self._grouper.indices.values()
  727. if group_indices:
  728. indexer = np.concatenate(list(group_indices))
  729. else:
  730. indexer = np.array([], dtype=np.intp)
  731. if target.ndim == 1:
  732. repeat_by = 1
  733. else:
  734. repeat_by = len(target.columns)
  735. groupby_codes = [
  736. np.repeat(c.take(indexer), repeat_by) for c in groupby_codes
  737. ]
  738. # 2) Determine the levels + codes of the result from super()._apply_pairwise
  739. if isinstance(result.index, MultiIndex):
  740. result_codes = list(result.index.codes)
  741. result_levels = list(result.index.levels)
  742. result_names = list(result.index.names)
  743. else:
  744. idx_codes, idx_levels = factorize(result.index)
  745. result_codes = [idx_codes]
  746. result_levels = [idx_levels]
  747. result_names = [result.index.name]
  748. # 3) Create the resulting index by combining 1) + 2)
  749. result_codes = groupby_codes + result_codes
  750. result_levels = groupby_levels + result_levels
  751. result_names = self._grouper.names + result_names
  752. result_index = MultiIndex(
  753. result_levels, result_codes, names=result_names, verify_integrity=False
  754. )
  755. result.index = result_index
  756. return result
  757. def _create_data(self, obj: NDFrameT, numeric_only: bool = False) -> NDFrameT:
  758. """
  759. Split data into blocks & return conformed data.
  760. """
  761. # Ensure the object we're rolling over is monotonically sorted relative
  762. # to the groups
  763. # GH 36197
  764. if not obj.empty:
  765. groupby_order = np.concatenate(list(self._grouper.indices.values())).astype(
  766. np.int64
  767. )
  768. obj = obj.take(groupby_order)
  769. return super()._create_data(obj, numeric_only)
  770. def _gotitem(self, key, ndim, subset=None):
  771. # we are setting the index on the actual object
  772. # here so our index is carried through to the selected obj
  773. # when we do the splitting for the groupby
  774. if self.on is not None:
  775. # GH 43355
  776. subset = self.obj.set_index(self._on)
  777. return super()._gotitem(key, ndim, subset=subset)
  778. class Window(BaseWindow):
  779. """
  780. Provide rolling window calculations.
  781. Parameters
  782. ----------
  783. window : int, timedelta, str, offset, or BaseIndexer subclass
  784. Size of the moving window.
  785. If an integer, the fixed number of observations used for
  786. each window.
  787. If a timedelta, str, or offset, the time period of each window. Each
  788. window will be a variable sized based on the observations included in
  789. the time-period. This is only valid for datetimelike indexes.
  790. To learn more about the offsets & frequency strings, please see `this link
  791. <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
  792. If a BaseIndexer subclass, the window boundaries
  793. based on the defined ``get_window_bounds`` method. Additional rolling
  794. keyword arguments, namely ``min_periods``, ``center``, ``closed`` and
  795. ``step`` will be passed to ``get_window_bounds``.
  796. min_periods : int, default None
  797. Minimum number of observations in window required to have a value;
  798. otherwise, result is ``np.nan``.
  799. For a window that is specified by an offset, ``min_periods`` will default to 1.
  800. For a window that is specified by an integer, ``min_periods`` will default
  801. to the size of the window.
  802. center : bool, default False
  803. If False, set the window labels as the right edge of the window index.
  804. If True, set the window labels as the center of the window index.
  805. win_type : str, default None
  806. If ``None``, all points are evenly weighted.
  807. If a string, it must be a valid `scipy.signal window function
  808. <https://docs.scipy.org/doc/scipy/reference/signal.windows.html#module-scipy.signal.windows>`__.
  809. Certain Scipy window types require additional parameters to be passed
  810. in the aggregation function. The additional parameters must match
  811. the keywords specified in the Scipy window type method signature.
  812. on : str, optional
  813. For a DataFrame, a column label or Index level on which
  814. to calculate the rolling window, rather than the DataFrame's index.
  815. Provided integer column is ignored and excluded from result since
  816. an integer index is not used to calculate the rolling window.
  817. axis : int or str, default 0
  818. If ``0`` or ``'index'``, roll across the rows.
  819. If ``1`` or ``'columns'``, roll across the columns.
  820. For `Series` this parameter is unused and defaults to 0.
  821. closed : str, default None
  822. If ``'right'``, the first point in the window is excluded from calculations.
  823. If ``'left'``, the last point in the window is excluded from calculations.
  824. If ``'both'``, the no points in the window are excluded from calculations.
  825. If ``'neither'``, the first and last points in the window are excluded
  826. from calculations.
  827. Default ``None`` (``'right'``).
  828. .. versionchanged:: 1.2.0
  829. The closed parameter with fixed windows is now supported.
  830. step : int, default None
  831. .. versionadded:: 1.5.0
  832. Evaluate the window at every ``step`` result, equivalent to slicing as
  833. ``[::step]``. ``window`` must be an integer. Using a step argument other
  834. than None or 1 will produce a result with a different shape than the input.
  835. method : str {'single', 'table'}, default 'single'
  836. .. versionadded:: 1.3.0
  837. Execute the rolling operation per single column or row (``'single'``)
  838. or over the entire object (``'table'``).
  839. This argument is only implemented when specifying ``engine='numba'``
  840. in the method call.
  841. Returns
  842. -------
  843. ``Window`` subclass if a ``win_type`` is passed
  844. ``Rolling`` subclass if ``win_type`` is not passed
  845. See Also
  846. --------
  847. expanding : Provides expanding transformations.
  848. ewm : Provides exponential weighted functions.
  849. Notes
  850. -----
  851. See :ref:`Windowing Operations <window.generic>` for further usage details
  852. and examples.
  853. Examples
  854. --------
  855. >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]})
  856. >>> df
  857. B
  858. 0 0.0
  859. 1 1.0
  860. 2 2.0
  861. 3 NaN
  862. 4 4.0
  863. **window**
  864. Rolling sum with a window length of 2 observations.
  865. >>> df.rolling(2).sum()
  866. B
  867. 0 NaN
  868. 1 1.0
  869. 2 3.0
  870. 3 NaN
  871. 4 NaN
  872. Rolling sum with a window span of 2 seconds.
  873. >>> df_time = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]},
  874. ... index = [pd.Timestamp('20130101 09:00:00'),
  875. ... pd.Timestamp('20130101 09:00:02'),
  876. ... pd.Timestamp('20130101 09:00:03'),
  877. ... pd.Timestamp('20130101 09:00:05'),
  878. ... pd.Timestamp('20130101 09:00:06')])
  879. >>> df_time
  880. B
  881. 2013-01-01 09:00:00 0.0
  882. 2013-01-01 09:00:02 1.0
  883. 2013-01-01 09:00:03 2.0
  884. 2013-01-01 09:00:05 NaN
  885. 2013-01-01 09:00:06 4.0
  886. >>> df_time.rolling('2s').sum()
  887. B
  888. 2013-01-01 09:00:00 0.0
  889. 2013-01-01 09:00:02 1.0
  890. 2013-01-01 09:00:03 3.0
  891. 2013-01-01 09:00:05 NaN
  892. 2013-01-01 09:00:06 4.0
  893. Rolling sum with forward looking windows with 2 observations.
  894. >>> indexer = pd.api.indexers.FixedForwardWindowIndexer(window_size=2)
  895. >>> df.rolling(window=indexer, min_periods=1).sum()
  896. B
  897. 0 1.0
  898. 1 3.0
  899. 2 2.0
  900. 3 4.0
  901. 4 4.0
  902. **min_periods**
  903. Rolling sum with a window length of 2 observations, but only needs a minimum of 1
  904. observation to calculate a value.
  905. >>> df.rolling(2, min_periods=1).sum()
  906. B
  907. 0 0.0
  908. 1 1.0
  909. 2 3.0
  910. 3 2.0
  911. 4 4.0
  912. **center**
  913. Rolling sum with the result assigned to the center of the window index.
  914. >>> df.rolling(3, min_periods=1, center=True).sum()
  915. B
  916. 0 1.0
  917. 1 3.0
  918. 2 3.0
  919. 3 6.0
  920. 4 4.0
  921. >>> df.rolling(3, min_periods=1, center=False).sum()
  922. B
  923. 0 0.0
  924. 1 1.0
  925. 2 3.0
  926. 3 3.0
  927. 4 6.0
  928. **step**
  929. Rolling sum with a window length of 2 observations, minimum of 1 observation to
  930. calculate a value, and a step of 2.
  931. >>> df.rolling(2, min_periods=1, step=2).sum()
  932. B
  933. 0 0.0
  934. 2 3.0
  935. 4 4.0
  936. **win_type**
  937. Rolling sum with a window length of 2, using the Scipy ``'gaussian'``
  938. window type. ``std`` is required in the aggregation function.
  939. >>> df.rolling(2, win_type='gaussian').sum(std=3)
  940. B
  941. 0 NaN
  942. 1 0.986207
  943. 2 2.958621
  944. 3 NaN
  945. 4 NaN
  946. **on**
  947. Rolling sum with a window length of 2 days.
  948. >>> df = pd.DataFrame({
  949. ... 'A': [pd.to_datetime('2020-01-01'),
  950. ... pd.to_datetime('2020-01-01'),
  951. ... pd.to_datetime('2020-01-02'),],
  952. ... 'B': [1, 2, 3], },
  953. ... index=pd.date_range('2020', periods=3))
  954. >>> df
  955. A B
  956. 2020-01-01 2020-01-01 1
  957. 2020-01-02 2020-01-01 2
  958. 2020-01-03 2020-01-02 3
  959. >>> df.rolling('2D', on='A').sum()
  960. A B
  961. 2020-01-01 2020-01-01 1.0
  962. 2020-01-02 2020-01-01 3.0
  963. 2020-01-03 2020-01-02 6.0
  964. """
  965. _attributes = [
  966. "window",
  967. "min_periods",
  968. "center",
  969. "win_type",
  970. "axis",
  971. "on",
  972. "closed",
  973. "step",
  974. "method",
  975. ]
  976. def _validate(self):
  977. super()._validate()
  978. if not isinstance(self.win_type, str):
  979. raise ValueError(f"Invalid win_type {self.win_type}")
  980. signal = import_optional_dependency(
  981. "scipy.signal.windows", extra="Scipy is required to generate window weight."
  982. )
  983. self._scipy_weight_generator = getattr(signal, self.win_type, None)
  984. if self._scipy_weight_generator is None:
  985. raise ValueError(f"Invalid win_type {self.win_type}")
  986. if isinstance(self.window, BaseIndexer):
  987. raise NotImplementedError(
  988. "BaseIndexer subclasses not implemented with win_types."
  989. )
  990. if not is_integer(self.window) or self.window < 0:
  991. raise ValueError("window must be an integer 0 or greater")
  992. if self.method != "single":
  993. raise NotImplementedError("'single' is the only supported method type.")
  994. def _center_window(self, result: np.ndarray, offset: int) -> np.ndarray:
  995. """
  996. Center the result in the window for weighted rolling aggregations.
  997. """
  998. if offset > 0:
  999. lead_indexer = [slice(offset, None)]
  1000. result = np.copy(result[tuple(lead_indexer)])
  1001. return result
  1002. def _apply(
  1003. self,
  1004. func: Callable[[np.ndarray, int, int], np.ndarray],
  1005. name: str,
  1006. numeric_only: bool = False,
  1007. numba_args: tuple[Any, ...] = (),
  1008. **kwargs,
  1009. ):
  1010. """
  1011. Rolling with weights statistical measure using supplied function.
  1012. Designed to be used with passed-in Cython array-based functions.
  1013. Parameters
  1014. ----------
  1015. func : callable function to apply
  1016. name : str,
  1017. numeric_only : bool, default False
  1018. Whether to only operate on bool, int, and float columns
  1019. numba_args : tuple
  1020. unused
  1021. **kwargs
  1022. additional arguments for scipy windows if necessary
  1023. Returns
  1024. -------
  1025. y : type of input
  1026. """
  1027. # "None" not callable [misc]
  1028. window = self._scipy_weight_generator( # type: ignore[misc]
  1029. self.window, **kwargs
  1030. )
  1031. offset = (len(window) - 1) // 2 if self.center else 0
  1032. def homogeneous_func(values: np.ndarray):
  1033. # calculation function
  1034. if values.size == 0:
  1035. return values.copy()
  1036. def calc(x):
  1037. additional_nans = np.array([np.nan] * offset)
  1038. x = np.concatenate((x, additional_nans))
  1039. return func(x, window, self.min_periods or len(window))
  1040. with np.errstate(all="ignore"):
  1041. # Our weighted aggregations return memoryviews
  1042. result = np.asarray(calc(values))
  1043. if self.center:
  1044. result = self._center_window(result, offset)
  1045. return result
  1046. return self._apply_blockwise(homogeneous_func, name, numeric_only)[:: self.step]
  1047. @doc(
  1048. _shared_docs["aggregate"],
  1049. see_also=dedent(
  1050. """
  1051. See Also
  1052. --------
  1053. pandas.DataFrame.aggregate : Similar DataFrame method.
  1054. pandas.Series.aggregate : Similar Series method.
  1055. """
  1056. ),
  1057. examples=dedent(
  1058. """
  1059. Examples
  1060. --------
  1061. >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
  1062. >>> df
  1063. A B C
  1064. 0 1 4 7
  1065. 1 2 5 8
  1066. 2 3 6 9
  1067. >>> df.rolling(2, win_type="boxcar").agg("mean")
  1068. A B C
  1069. 0 NaN NaN NaN
  1070. 1 1.5 4.5 7.5
  1071. 2 2.5 5.5 8.5
  1072. """
  1073. ),
  1074. klass="Series/DataFrame",
  1075. axis="",
  1076. )
  1077. def aggregate(self, func, *args, **kwargs):
  1078. result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()
  1079. if result is None:
  1080. # these must apply directly
  1081. result = func(self)
  1082. return result
  1083. agg = aggregate
  1084. @doc(
  1085. template_header,
  1086. create_section_header("Parameters"),
  1087. kwargs_numeric_only,
  1088. kwargs_scipy,
  1089. create_section_header("Returns"),
  1090. template_returns,
  1091. create_section_header("See Also"),
  1092. template_see_also[:-1],
  1093. window_method="rolling",
  1094. aggregation_description="weighted window sum",
  1095. agg_method="sum",
  1096. )
  1097. def sum(self, numeric_only: bool = False, **kwargs):
  1098. window_func = window_aggregations.roll_weighted_sum
  1099. # error: Argument 1 to "_apply" of "Window" has incompatible type
  1100. # "Callable[[ndarray, ndarray, int], ndarray]"; expected
  1101. # "Callable[[ndarray, int, int], ndarray]"
  1102. return self._apply(
  1103. window_func, # type: ignore[arg-type]
  1104. name="sum",
  1105. numeric_only=numeric_only,
  1106. **kwargs,
  1107. )
  1108. @doc(
  1109. template_header,
  1110. create_section_header("Parameters"),
  1111. kwargs_numeric_only,
  1112. kwargs_scipy,
  1113. create_section_header("Returns"),
  1114. template_returns,
  1115. create_section_header("See Also"),
  1116. template_see_also[:-1],
  1117. window_method="rolling",
  1118. aggregation_description="weighted window mean",
  1119. agg_method="mean",
  1120. )
  1121. def mean(self, numeric_only: bool = False, **kwargs):
  1122. window_func = window_aggregations.roll_weighted_mean
  1123. # error: Argument 1 to "_apply" of "Window" has incompatible type
  1124. # "Callable[[ndarray, ndarray, int], ndarray]"; expected
  1125. # "Callable[[ndarray, int, int], ndarray]"
  1126. return self._apply(
  1127. window_func, # type: ignore[arg-type]
  1128. name="mean",
  1129. numeric_only=numeric_only,
  1130. **kwargs,
  1131. )
  1132. @doc(
  1133. template_header,
  1134. create_section_header("Parameters"),
  1135. kwargs_numeric_only,
  1136. kwargs_scipy,
  1137. create_section_header("Returns"),
  1138. template_returns,
  1139. create_section_header("See Also"),
  1140. template_see_also[:-1],
  1141. window_method="rolling",
  1142. aggregation_description="weighted window variance",
  1143. agg_method="var",
  1144. )
  1145. def var(self, ddof: int = 1, numeric_only: bool = False, **kwargs):
  1146. window_func = partial(window_aggregations.roll_weighted_var, ddof=ddof)
  1147. kwargs.pop("name", None)
  1148. return self._apply(window_func, name="var", numeric_only=numeric_only, **kwargs)
  1149. @doc(
  1150. template_header,
  1151. create_section_header("Parameters"),
  1152. kwargs_numeric_only,
  1153. kwargs_scipy,
  1154. create_section_header("Returns"),
  1155. template_returns,
  1156. create_section_header("See Also"),
  1157. template_see_also[:-1],
  1158. window_method="rolling",
  1159. aggregation_description="weighted window standard deviation",
  1160. agg_method="std",
  1161. )
  1162. def std(self, ddof: int = 1, numeric_only: bool = False, **kwargs):
  1163. return zsqrt(
  1164. self.var(ddof=ddof, name="std", numeric_only=numeric_only, **kwargs)
  1165. )
  1166. class RollingAndExpandingMixin(BaseWindow):
  1167. def count(self, numeric_only: bool = False):
  1168. window_func = window_aggregations.roll_sum
  1169. return self._apply(window_func, name="count", numeric_only=numeric_only)
  1170. def apply(
  1171. self,
  1172. func: Callable[..., Any],
  1173. raw: bool = False,
  1174. engine: str | None = None,
  1175. engine_kwargs: dict[str, bool] | None = None,
  1176. args: tuple[Any, ...] | None = None,
  1177. kwargs: dict[str, Any] | None = None,
  1178. ):
  1179. if args is None:
  1180. args = ()
  1181. if kwargs is None:
  1182. kwargs = {}
  1183. if not is_bool(raw):
  1184. raise ValueError("raw parameter must be `True` or `False`")
  1185. numba_args: tuple[Any, ...] = ()
  1186. if maybe_use_numba(engine):
  1187. if raw is False:
  1188. raise ValueError("raw must be `True` when using the numba engine")
  1189. numba_args = args
  1190. if self.method == "single":
  1191. apply_func = generate_numba_apply_func(
  1192. func, **get_jit_arguments(engine_kwargs, kwargs)
  1193. )
  1194. else:
  1195. apply_func = generate_numba_table_func(
  1196. func, **get_jit_arguments(engine_kwargs, kwargs)
  1197. )
  1198. elif engine in ("cython", None):
  1199. if engine_kwargs is not None:
  1200. raise ValueError("cython engine does not accept engine_kwargs")
  1201. apply_func = self._generate_cython_apply_func(args, kwargs, raw, func)
  1202. else:
  1203. raise ValueError("engine must be either 'numba' or 'cython'")
  1204. return self._apply(
  1205. apply_func,
  1206. name="apply",
  1207. numba_args=numba_args,
  1208. )
  1209. def _generate_cython_apply_func(
  1210. self,
  1211. args: tuple[Any, ...],
  1212. kwargs: dict[str, Any],
  1213. raw: bool,
  1214. function: Callable[..., Any],
  1215. ) -> Callable[[np.ndarray, np.ndarray, np.ndarray, int], np.ndarray]:
  1216. from pandas import Series
  1217. window_func = partial(
  1218. window_aggregations.roll_apply,
  1219. args=args,
  1220. kwargs=kwargs,
  1221. raw=raw,
  1222. function=function,
  1223. )
  1224. def apply_func(values, begin, end, min_periods, raw=raw):
  1225. if not raw:
  1226. # GH 45912
  1227. values = Series(values, index=self._on, copy=False)
  1228. return window_func(values, begin, end, min_periods)
  1229. return apply_func
  1230. def sum(
  1231. self,
  1232. numeric_only: bool = False,
  1233. engine: str | None = None,
  1234. engine_kwargs: dict[str, bool] | None = None,
  1235. ):
  1236. if maybe_use_numba(engine):
  1237. if self.method == "table":
  1238. func = generate_manual_numpy_nan_agg_with_axis(np.nansum)
  1239. return self.apply(
  1240. func,
  1241. raw=True,
  1242. engine=engine,
  1243. engine_kwargs=engine_kwargs,
  1244. )
  1245. else:
  1246. from pandas.core._numba.kernels import sliding_sum
  1247. return self._numba_apply(sliding_sum, engine_kwargs)
  1248. window_func = window_aggregations.roll_sum
  1249. return self._apply(window_func, name="sum", numeric_only=numeric_only)
  1250. def max(
  1251. self,
  1252. numeric_only: bool = False,
  1253. engine: str | None = None,
  1254. engine_kwargs: dict[str, bool] | None = None,
  1255. ):
  1256. if maybe_use_numba(engine):
  1257. if self.method == "table":
  1258. func = generate_manual_numpy_nan_agg_with_axis(np.nanmax)
  1259. return self.apply(
  1260. func,
  1261. raw=True,
  1262. engine=engine,
  1263. engine_kwargs=engine_kwargs,
  1264. )
  1265. else:
  1266. from pandas.core._numba.kernels import sliding_min_max
  1267. return self._numba_apply(sliding_min_max, engine_kwargs, True)
  1268. window_func = window_aggregations.roll_max
  1269. return self._apply(window_func, name="max", numeric_only=numeric_only)
  1270. def min(
  1271. self,
  1272. numeric_only: bool = False,
  1273. engine: str | None = None,
  1274. engine_kwargs: dict[str, bool] | None = None,
  1275. ):
  1276. if maybe_use_numba(engine):
  1277. if self.method == "table":
  1278. func = generate_manual_numpy_nan_agg_with_axis(np.nanmin)
  1279. return self.apply(
  1280. func,
  1281. raw=True,
  1282. engine=engine,
  1283. engine_kwargs=engine_kwargs,
  1284. )
  1285. else:
  1286. from pandas.core._numba.kernels import sliding_min_max
  1287. return self._numba_apply(sliding_min_max, engine_kwargs, False)
  1288. window_func = window_aggregations.roll_min
  1289. return self._apply(window_func, name="min", numeric_only=numeric_only)
  1290. def mean(
  1291. self,
  1292. numeric_only: bool = False,
  1293. engine: str | None = None,
  1294. engine_kwargs: dict[str, bool] | None = None,
  1295. ):
  1296. if maybe_use_numba(engine):
  1297. if self.method == "table":
  1298. func = generate_manual_numpy_nan_agg_with_axis(np.nanmean)
  1299. return self.apply(
  1300. func,
  1301. raw=True,
  1302. engine=engine,
  1303. engine_kwargs=engine_kwargs,
  1304. )
  1305. else:
  1306. from pandas.core._numba.kernels import sliding_mean
  1307. return self._numba_apply(sliding_mean, engine_kwargs)
  1308. window_func = window_aggregations.roll_mean
  1309. return self._apply(window_func, name="mean", numeric_only=numeric_only)
  1310. def median(
  1311. self,
  1312. numeric_only: bool = False,
  1313. engine: str | None = None,
  1314. engine_kwargs: dict[str, bool] | None = None,
  1315. ):
  1316. if maybe_use_numba(engine):
  1317. if self.method == "table":
  1318. func = generate_manual_numpy_nan_agg_with_axis(np.nanmedian)
  1319. else:
  1320. func = np.nanmedian
  1321. return self.apply(
  1322. func,
  1323. raw=True,
  1324. engine=engine,
  1325. engine_kwargs=engine_kwargs,
  1326. )
  1327. window_func = window_aggregations.roll_median_c
  1328. return self._apply(window_func, name="median", numeric_only=numeric_only)
  1329. def std(
  1330. self,
  1331. ddof: int = 1,
  1332. numeric_only: bool = False,
  1333. engine: str | None = None,
  1334. engine_kwargs: dict[str, bool] | None = None,
  1335. ):
  1336. if maybe_use_numba(engine):
  1337. if self.method == "table":
  1338. raise NotImplementedError("std not supported with method='table'")
  1339. from pandas.core._numba.kernels import sliding_var
  1340. return zsqrt(self._numba_apply(sliding_var, engine_kwargs, ddof))
  1341. window_func = window_aggregations.roll_var
  1342. def zsqrt_func(values, begin, end, min_periods):
  1343. return zsqrt(window_func(values, begin, end, min_periods, ddof=ddof))
  1344. return self._apply(
  1345. zsqrt_func,
  1346. name="std",
  1347. numeric_only=numeric_only,
  1348. )
  1349. def var(
  1350. self,
  1351. ddof: int = 1,
  1352. numeric_only: bool = False,
  1353. engine: str | None = None,
  1354. engine_kwargs: dict[str, bool] | None = None,
  1355. ):
  1356. if maybe_use_numba(engine):
  1357. if self.method == "table":
  1358. raise NotImplementedError("var not supported with method='table'")
  1359. from pandas.core._numba.kernels import sliding_var
  1360. return self._numba_apply(sliding_var, engine_kwargs, ddof)
  1361. window_func = partial(window_aggregations.roll_var, ddof=ddof)
  1362. return self._apply(
  1363. window_func,
  1364. name="var",
  1365. numeric_only=numeric_only,
  1366. )
  1367. def skew(self, numeric_only: bool = False):
  1368. window_func = window_aggregations.roll_skew
  1369. return self._apply(
  1370. window_func,
  1371. name="skew",
  1372. numeric_only=numeric_only,
  1373. )
  1374. def sem(self, ddof: int = 1, numeric_only: bool = False):
  1375. # Raise here so error message says sem instead of std
  1376. self._validate_numeric_only("sem", numeric_only)
  1377. return self.std(numeric_only=numeric_only) / (
  1378. self.count(numeric_only=numeric_only) - ddof
  1379. ).pow(0.5)
  1380. def kurt(self, numeric_only: bool = False):
  1381. window_func = window_aggregations.roll_kurt
  1382. return self._apply(
  1383. window_func,
  1384. name="kurt",
  1385. numeric_only=numeric_only,
  1386. )
  1387. def quantile(
  1388. self,
  1389. quantile: float,
  1390. interpolation: QuantileInterpolation = "linear",
  1391. numeric_only: bool = False,
  1392. ):
  1393. if quantile == 1.0:
  1394. window_func = window_aggregations.roll_max
  1395. elif quantile == 0.0:
  1396. window_func = window_aggregations.roll_min
  1397. else:
  1398. window_func = partial(
  1399. window_aggregations.roll_quantile,
  1400. quantile=quantile,
  1401. interpolation=interpolation,
  1402. )
  1403. return self._apply(window_func, name="quantile", numeric_only=numeric_only)
  1404. def rank(
  1405. self,
  1406. method: WindowingRankType = "average",
  1407. ascending: bool = True,
  1408. pct: bool = False,
  1409. numeric_only: bool = False,
  1410. ):
  1411. window_func = partial(
  1412. window_aggregations.roll_rank,
  1413. method=method,
  1414. ascending=ascending,
  1415. percentile=pct,
  1416. )
  1417. return self._apply(window_func, name="rank", numeric_only=numeric_only)
  1418. def cov(
  1419. self,
  1420. other: DataFrame | Series | None = None,
  1421. pairwise: bool | None = None,
  1422. ddof: int = 1,
  1423. numeric_only: bool = False,
  1424. ):
  1425. if self.step is not None:
  1426. raise NotImplementedError("step not implemented for cov")
  1427. self._validate_numeric_only("cov", numeric_only)
  1428. from pandas import Series
  1429. def cov_func(x, y):
  1430. x_array = self._prep_values(x)
  1431. y_array = self._prep_values(y)
  1432. window_indexer = self._get_window_indexer()
  1433. min_periods = (
  1434. self.min_periods
  1435. if self.min_periods is not None
  1436. else window_indexer.window_size
  1437. )
  1438. start, end = window_indexer.get_window_bounds(
  1439. num_values=len(x_array),
  1440. min_periods=min_periods,
  1441. center=self.center,
  1442. closed=self.closed,
  1443. step=self.step,
  1444. )
  1445. self._check_window_bounds(start, end, len(x_array))
  1446. with np.errstate(all="ignore"):
  1447. mean_x_y = window_aggregations.roll_mean(
  1448. x_array * y_array, start, end, min_periods
  1449. )
  1450. mean_x = window_aggregations.roll_mean(x_array, start, end, min_periods)
  1451. mean_y = window_aggregations.roll_mean(y_array, start, end, min_periods)
  1452. count_x_y = window_aggregations.roll_sum(
  1453. notna(x_array + y_array).astype(np.float64), start, end, 0
  1454. )
  1455. result = (mean_x_y - mean_x * mean_y) * (count_x_y / (count_x_y - ddof))
  1456. return Series(result, index=x.index, name=x.name, copy=False)
  1457. return self._apply_pairwise(
  1458. self._selected_obj, other, pairwise, cov_func, numeric_only
  1459. )
  1460. def corr(
  1461. self,
  1462. other: DataFrame | Series | None = None,
  1463. pairwise: bool | None = None,
  1464. ddof: int = 1,
  1465. numeric_only: bool = False,
  1466. ):
  1467. if self.step is not None:
  1468. raise NotImplementedError("step not implemented for corr")
  1469. self._validate_numeric_only("corr", numeric_only)
  1470. from pandas import Series
  1471. def corr_func(x, y):
  1472. x_array = self._prep_values(x)
  1473. y_array = self._prep_values(y)
  1474. window_indexer = self._get_window_indexer()
  1475. min_periods = (
  1476. self.min_periods
  1477. if self.min_periods is not None
  1478. else window_indexer.window_size
  1479. )
  1480. start, end = window_indexer.get_window_bounds(
  1481. num_values=len(x_array),
  1482. min_periods=min_periods,
  1483. center=self.center,
  1484. closed=self.closed,
  1485. step=self.step,
  1486. )
  1487. self._check_window_bounds(start, end, len(x_array))
  1488. with np.errstate(all="ignore"):
  1489. mean_x_y = window_aggregations.roll_mean(
  1490. x_array * y_array, start, end, min_periods
  1491. )
  1492. mean_x = window_aggregations.roll_mean(x_array, start, end, min_periods)
  1493. mean_y = window_aggregations.roll_mean(y_array, start, end, min_periods)
  1494. count_x_y = window_aggregations.roll_sum(
  1495. notna(x_array + y_array).astype(np.float64), start, end, 0
  1496. )
  1497. x_var = window_aggregations.roll_var(
  1498. x_array, start, end, min_periods, ddof
  1499. )
  1500. y_var = window_aggregations.roll_var(
  1501. y_array, start, end, min_periods, ddof
  1502. )
  1503. numerator = (mean_x_y - mean_x * mean_y) * (
  1504. count_x_y / (count_x_y - ddof)
  1505. )
  1506. denominator = (x_var * y_var) ** 0.5
  1507. result = numerator / denominator
  1508. return Series(result, index=x.index, name=x.name, copy=False)
  1509. return self._apply_pairwise(
  1510. self._selected_obj, other, pairwise, corr_func, numeric_only
  1511. )
  1512. class Rolling(RollingAndExpandingMixin):
  1513. _attributes: list[str] = [
  1514. "window",
  1515. "min_periods",
  1516. "center",
  1517. "win_type",
  1518. "axis",
  1519. "on",
  1520. "closed",
  1521. "step",
  1522. "method",
  1523. ]
  1524. def _validate(self):
  1525. super()._validate()
  1526. # we allow rolling on a datetimelike index
  1527. if (
  1528. self.obj.empty
  1529. or isinstance(self._on, (DatetimeIndex, TimedeltaIndex, PeriodIndex))
  1530. ) and isinstance(self.window, (str, BaseOffset, timedelta)):
  1531. self._validate_datetimelike_monotonic()
  1532. # this will raise ValueError on non-fixed freqs
  1533. try:
  1534. freq = to_offset(self.window)
  1535. except (TypeError, ValueError) as err:
  1536. raise ValueError(
  1537. f"passed window {self.window} is not "
  1538. "compatible with a datetimelike index"
  1539. ) from err
  1540. if isinstance(self._on, PeriodIndex):
  1541. # error: Incompatible types in assignment (expression has type
  1542. # "float", variable has type "Optional[int]")
  1543. self._win_freq_i8 = freq.nanos / ( # type: ignore[assignment]
  1544. self._on.freq.nanos / self._on.freq.n
  1545. )
  1546. else:
  1547. self._win_freq_i8 = freq.nanos
  1548. # min_periods must be an integer
  1549. if self.min_periods is None:
  1550. self.min_periods = 1
  1551. if self.step is not None:
  1552. raise NotImplementedError(
  1553. "step is not supported with frequency windows"
  1554. )
  1555. elif isinstance(self.window, BaseIndexer):
  1556. # Passed BaseIndexer subclass should handle all other rolling kwargs
  1557. pass
  1558. elif not is_integer(self.window) or self.window < 0:
  1559. raise ValueError("window must be an integer 0 or greater")
  1560. def _validate_datetimelike_monotonic(self) -> None:
  1561. """
  1562. Validate self._on is monotonic (increasing or decreasing) and has
  1563. no NaT values for frequency windows.
  1564. """
  1565. if self._on.hasnans:
  1566. self._raise_monotonic_error("values must not have NaT")
  1567. if not (self._on.is_monotonic_increasing or self._on.is_monotonic_decreasing):
  1568. self._raise_monotonic_error("values must be monotonic")
  1569. def _raise_monotonic_error(self, msg: str):
  1570. on = self.on
  1571. if on is None:
  1572. if self.axis == 0:
  1573. on = "index"
  1574. else:
  1575. on = "column"
  1576. raise ValueError(f"{on} {msg}")
  1577. @doc(
  1578. _shared_docs["aggregate"],
  1579. see_also=dedent(
  1580. """
  1581. See Also
  1582. --------
  1583. pandas.Series.rolling : Calling object with Series data.
  1584. pandas.DataFrame.rolling : Calling object with DataFrame data.
  1585. """
  1586. ),
  1587. examples=dedent(
  1588. """
  1589. Examples
  1590. --------
  1591. >>> df = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]})
  1592. >>> df
  1593. A B C
  1594. 0 1 4 7
  1595. 1 2 5 8
  1596. 2 3 6 9
  1597. >>> df.rolling(2).sum()
  1598. A B C
  1599. 0 NaN NaN NaN
  1600. 1 3.0 9.0 15.0
  1601. 2 5.0 11.0 17.0
  1602. >>> df.rolling(2).agg({"A": "sum", "B": "min"})
  1603. A B
  1604. 0 NaN NaN
  1605. 1 3.0 4.0
  1606. 2 5.0 5.0
  1607. """
  1608. ),
  1609. klass="Series/Dataframe",
  1610. axis="",
  1611. )
  1612. def aggregate(self, func, *args, **kwargs):
  1613. return super().aggregate(func, *args, **kwargs)
  1614. agg = aggregate
  1615. @doc(
  1616. template_header,
  1617. create_section_header("Parameters"),
  1618. kwargs_numeric_only,
  1619. create_section_header("Returns"),
  1620. template_returns,
  1621. create_section_header("See Also"),
  1622. template_see_also,
  1623. create_section_header("Examples"),
  1624. dedent(
  1625. """
  1626. >>> s = pd.Series([2, 3, np.nan, 10])
  1627. >>> s.rolling(2).count()
  1628. 0 NaN
  1629. 1 2.0
  1630. 2 1.0
  1631. 3 1.0
  1632. dtype: float64
  1633. >>> s.rolling(3).count()
  1634. 0 NaN
  1635. 1 NaN
  1636. 2 2.0
  1637. 3 2.0
  1638. dtype: float64
  1639. >>> s.rolling(4).count()
  1640. 0 NaN
  1641. 1 NaN
  1642. 2 NaN
  1643. 3 3.0
  1644. dtype: float64
  1645. """
  1646. ).replace("\n", "", 1),
  1647. window_method="rolling",
  1648. aggregation_description="count of non NaN observations",
  1649. agg_method="count",
  1650. )
  1651. def count(self, numeric_only: bool = False):
  1652. return super().count(numeric_only)
  1653. @doc(
  1654. template_header,
  1655. create_section_header("Parameters"),
  1656. window_apply_parameters,
  1657. create_section_header("Returns"),
  1658. template_returns,
  1659. create_section_header("See Also"),
  1660. template_see_also[:-1],
  1661. window_method="rolling",
  1662. aggregation_description="custom aggregation function",
  1663. agg_method="apply",
  1664. )
  1665. def apply(
  1666. self,
  1667. func: Callable[..., Any],
  1668. raw: bool = False,
  1669. engine: str | None = None,
  1670. engine_kwargs: dict[str, bool] | None = None,
  1671. args: tuple[Any, ...] | None = None,
  1672. kwargs: dict[str, Any] | None = None,
  1673. ):
  1674. return super().apply(
  1675. func,
  1676. raw=raw,
  1677. engine=engine,
  1678. engine_kwargs=engine_kwargs,
  1679. args=args,
  1680. kwargs=kwargs,
  1681. )
  1682. @doc(
  1683. template_header,
  1684. create_section_header("Parameters"),
  1685. kwargs_numeric_only,
  1686. window_agg_numba_parameters(),
  1687. create_section_header("Returns"),
  1688. template_returns,
  1689. create_section_header("See Also"),
  1690. template_see_also,
  1691. create_section_header("Notes"),
  1692. numba_notes,
  1693. create_section_header("Examples"),
  1694. dedent(
  1695. """
  1696. >>> s = pd.Series([1, 2, 3, 4, 5])
  1697. >>> s
  1698. 0 1
  1699. 1 2
  1700. 2 3
  1701. 3 4
  1702. 4 5
  1703. dtype: int64
  1704. >>> s.rolling(3).sum()
  1705. 0 NaN
  1706. 1 NaN
  1707. 2 6.0
  1708. 3 9.0
  1709. 4 12.0
  1710. dtype: float64
  1711. >>> s.rolling(3, center=True).sum()
  1712. 0 NaN
  1713. 1 6.0
  1714. 2 9.0
  1715. 3 12.0
  1716. 4 NaN
  1717. dtype: float64
  1718. For DataFrame, each sum is computed column-wise.
  1719. >>> df = pd.DataFrame({{"A": s, "B": s ** 2}})
  1720. >>> df
  1721. A B
  1722. 0 1 1
  1723. 1 2 4
  1724. 2 3 9
  1725. 3 4 16
  1726. 4 5 25
  1727. >>> df.rolling(3).sum()
  1728. A B
  1729. 0 NaN NaN
  1730. 1 NaN NaN
  1731. 2 6.0 14.0
  1732. 3 9.0 29.0
  1733. 4 12.0 50.0
  1734. """
  1735. ).replace("\n", "", 1),
  1736. window_method="rolling",
  1737. aggregation_description="sum",
  1738. agg_method="sum",
  1739. )
  1740. def sum(
  1741. self,
  1742. numeric_only: bool = False,
  1743. engine: str | None = None,
  1744. engine_kwargs: dict[str, bool] | None = None,
  1745. ):
  1746. return super().sum(
  1747. numeric_only=numeric_only,
  1748. engine=engine,
  1749. engine_kwargs=engine_kwargs,
  1750. )
  1751. @doc(
  1752. template_header,
  1753. create_section_header("Parameters"),
  1754. kwargs_numeric_only,
  1755. window_agg_numba_parameters(),
  1756. create_section_header("Returns"),
  1757. template_returns,
  1758. create_section_header("See Also"),
  1759. template_see_also,
  1760. create_section_header("Notes"),
  1761. numba_notes[:-1],
  1762. window_method="rolling",
  1763. aggregation_description="maximum",
  1764. agg_method="max",
  1765. )
  1766. def max(
  1767. self,
  1768. numeric_only: bool = False,
  1769. *args,
  1770. engine: str | None = None,
  1771. engine_kwargs: dict[str, bool] | None = None,
  1772. **kwargs,
  1773. ):
  1774. return super().max(
  1775. numeric_only=numeric_only,
  1776. engine=engine,
  1777. engine_kwargs=engine_kwargs,
  1778. )
  1779. @doc(
  1780. template_header,
  1781. create_section_header("Parameters"),
  1782. kwargs_numeric_only,
  1783. window_agg_numba_parameters(),
  1784. create_section_header("Returns"),
  1785. template_returns,
  1786. create_section_header("See Also"),
  1787. template_see_also,
  1788. create_section_header("Notes"),
  1789. numba_notes,
  1790. create_section_header("Examples"),
  1791. dedent(
  1792. """
  1793. Performing a rolling minimum with a window size of 3.
  1794. >>> s = pd.Series([4, 3, 5, 2, 6])
  1795. >>> s.rolling(3).min()
  1796. 0 NaN
  1797. 1 NaN
  1798. 2 3.0
  1799. 3 2.0
  1800. 4 2.0
  1801. dtype: float64
  1802. """
  1803. ).replace("\n", "", 1),
  1804. window_method="rolling",
  1805. aggregation_description="minimum",
  1806. agg_method="min",
  1807. )
  1808. def min(
  1809. self,
  1810. numeric_only: bool = False,
  1811. engine: str | None = None,
  1812. engine_kwargs: dict[str, bool] | None = None,
  1813. ):
  1814. return super().min(
  1815. numeric_only=numeric_only,
  1816. engine=engine,
  1817. engine_kwargs=engine_kwargs,
  1818. )
  1819. @doc(
  1820. template_header,
  1821. create_section_header("Parameters"),
  1822. kwargs_numeric_only,
  1823. window_agg_numba_parameters(),
  1824. create_section_header("Returns"),
  1825. template_returns,
  1826. create_section_header("See Also"),
  1827. template_see_also,
  1828. create_section_header("Notes"),
  1829. numba_notes,
  1830. create_section_header("Examples"),
  1831. dedent(
  1832. """
  1833. The below examples will show rolling mean calculations with window sizes of
  1834. two and three, respectively.
  1835. >>> s = pd.Series([1, 2, 3, 4])
  1836. >>> s.rolling(2).mean()
  1837. 0 NaN
  1838. 1 1.5
  1839. 2 2.5
  1840. 3 3.5
  1841. dtype: float64
  1842. >>> s.rolling(3).mean()
  1843. 0 NaN
  1844. 1 NaN
  1845. 2 2.0
  1846. 3 3.0
  1847. dtype: float64
  1848. """
  1849. ).replace("\n", "", 1),
  1850. window_method="rolling",
  1851. aggregation_description="mean",
  1852. agg_method="mean",
  1853. )
  1854. def mean(
  1855. self,
  1856. numeric_only: bool = False,
  1857. engine: str | None = None,
  1858. engine_kwargs: dict[str, bool] | None = None,
  1859. ):
  1860. return super().mean(
  1861. numeric_only=numeric_only,
  1862. engine=engine,
  1863. engine_kwargs=engine_kwargs,
  1864. )
  1865. @doc(
  1866. template_header,
  1867. create_section_header("Parameters"),
  1868. kwargs_numeric_only,
  1869. window_agg_numba_parameters(),
  1870. create_section_header("Returns"),
  1871. template_returns,
  1872. create_section_header("See Also"),
  1873. template_see_also,
  1874. create_section_header("Notes"),
  1875. numba_notes,
  1876. create_section_header("Examples"),
  1877. dedent(
  1878. """
  1879. Compute the rolling median of a series with a window size of 3.
  1880. >>> s = pd.Series([0, 1, 2, 3, 4])
  1881. >>> s.rolling(3).median()
  1882. 0 NaN
  1883. 1 NaN
  1884. 2 1.0
  1885. 3 2.0
  1886. 4 3.0
  1887. dtype: float64
  1888. """
  1889. ).replace("\n", "", 1),
  1890. window_method="rolling",
  1891. aggregation_description="median",
  1892. agg_method="median",
  1893. )
  1894. def median(
  1895. self,
  1896. numeric_only: bool = False,
  1897. engine: str | None = None,
  1898. engine_kwargs: dict[str, bool] | None = None,
  1899. ):
  1900. return super().median(
  1901. numeric_only=numeric_only,
  1902. engine=engine,
  1903. engine_kwargs=engine_kwargs,
  1904. )
  1905. @doc(
  1906. template_header,
  1907. create_section_header("Parameters"),
  1908. dedent(
  1909. """
  1910. ddof : int, default 1
  1911. Delta Degrees of Freedom. The divisor used in calculations
  1912. is ``N - ddof``, where ``N`` represents the number of elements.
  1913. """
  1914. ).replace("\n", "", 1),
  1915. kwargs_numeric_only,
  1916. window_agg_numba_parameters("1.4"),
  1917. create_section_header("Returns"),
  1918. template_returns,
  1919. create_section_header("See Also"),
  1920. "numpy.std : Equivalent method for NumPy array.\n",
  1921. template_see_also,
  1922. create_section_header("Notes"),
  1923. dedent(
  1924. """
  1925. The default ``ddof`` of 1 used in :meth:`Series.std` is different
  1926. than the default ``ddof`` of 0 in :func:`numpy.std`.
  1927. A minimum of one period is required for the rolling calculation.\n
  1928. """
  1929. ).replace("\n", "", 1),
  1930. create_section_header("Examples"),
  1931. dedent(
  1932. """
  1933. >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
  1934. >>> s.rolling(3).std()
  1935. 0 NaN
  1936. 1 NaN
  1937. 2 0.577350
  1938. 3 1.000000
  1939. 4 1.000000
  1940. 5 1.154701
  1941. 6 0.000000
  1942. dtype: float64
  1943. """
  1944. ).replace("\n", "", 1),
  1945. window_method="rolling",
  1946. aggregation_description="standard deviation",
  1947. agg_method="std",
  1948. )
  1949. def std(
  1950. self,
  1951. ddof: int = 1,
  1952. numeric_only: bool = False,
  1953. engine: str | None = None,
  1954. engine_kwargs: dict[str, bool] | None = None,
  1955. ):
  1956. return super().std(
  1957. ddof=ddof,
  1958. numeric_only=numeric_only,
  1959. engine=engine,
  1960. engine_kwargs=engine_kwargs,
  1961. )
  1962. @doc(
  1963. template_header,
  1964. create_section_header("Parameters"),
  1965. dedent(
  1966. """
  1967. ddof : int, default 1
  1968. Delta Degrees of Freedom. The divisor used in calculations
  1969. is ``N - ddof``, where ``N`` represents the number of elements.
  1970. """
  1971. ).replace("\n", "", 1),
  1972. kwargs_numeric_only,
  1973. window_agg_numba_parameters("1.4"),
  1974. create_section_header("Returns"),
  1975. template_returns,
  1976. create_section_header("See Also"),
  1977. "numpy.var : Equivalent method for NumPy array.\n",
  1978. template_see_also,
  1979. create_section_header("Notes"),
  1980. dedent(
  1981. """
  1982. The default ``ddof`` of 1 used in :meth:`Series.var` is different
  1983. than the default ``ddof`` of 0 in :func:`numpy.var`.
  1984. A minimum of one period is required for the rolling calculation.\n
  1985. """
  1986. ).replace("\n", "", 1),
  1987. create_section_header("Examples"),
  1988. dedent(
  1989. """
  1990. >>> s = pd.Series([5, 5, 6, 7, 5, 5, 5])
  1991. >>> s.rolling(3).var()
  1992. 0 NaN
  1993. 1 NaN
  1994. 2 0.333333
  1995. 3 1.000000
  1996. 4 1.000000
  1997. 5 1.333333
  1998. 6 0.000000
  1999. dtype: float64
  2000. """
  2001. ).replace("\n", "", 1),
  2002. window_method="rolling",
  2003. aggregation_description="variance",
  2004. agg_method="var",
  2005. )
  2006. def var(
  2007. self,
  2008. ddof: int = 1,
  2009. numeric_only: bool = False,
  2010. engine: str | None = None,
  2011. engine_kwargs: dict[str, bool] | None = None,
  2012. ):
  2013. return super().var(
  2014. ddof=ddof,
  2015. numeric_only=numeric_only,
  2016. engine=engine,
  2017. engine_kwargs=engine_kwargs,
  2018. )
  2019. @doc(
  2020. template_header,
  2021. create_section_header("Parameters"),
  2022. kwargs_numeric_only,
  2023. create_section_header("Returns"),
  2024. template_returns,
  2025. create_section_header("See Also"),
  2026. "scipy.stats.skew : Third moment of a probability density.\n",
  2027. template_see_also,
  2028. create_section_header("Notes"),
  2029. "A minimum of three periods is required for the rolling calculation.\n",
  2030. window_method="rolling",
  2031. aggregation_description="unbiased skewness",
  2032. agg_method="skew",
  2033. )
  2034. def skew(self, numeric_only: bool = False):
  2035. return super().skew(numeric_only=numeric_only)
  2036. @doc(
  2037. template_header,
  2038. create_section_header("Parameters"),
  2039. dedent(
  2040. """
  2041. ddof : int, default 1
  2042. Delta Degrees of Freedom. The divisor used in calculations
  2043. is ``N - ddof``, where ``N`` represents the number of elements.
  2044. """
  2045. ).replace("\n", "", 1),
  2046. kwargs_numeric_only,
  2047. create_section_header("Returns"),
  2048. template_returns,
  2049. create_section_header("See Also"),
  2050. template_see_also,
  2051. create_section_header("Notes"),
  2052. "A minimum of one period is required for the calculation.\n\n",
  2053. create_section_header("Examples"),
  2054. dedent(
  2055. """
  2056. >>> s = pd.Series([0, 1, 2, 3])
  2057. >>> s.rolling(2, min_periods=1).sem()
  2058. 0 NaN
  2059. 1 0.707107
  2060. 2 0.707107
  2061. 3 0.707107
  2062. dtype: float64
  2063. """
  2064. ).replace("\n", "", 1),
  2065. window_method="rolling",
  2066. aggregation_description="standard error of mean",
  2067. agg_method="sem",
  2068. )
  2069. def sem(self, ddof: int = 1, numeric_only: bool = False):
  2070. # Raise here so error message says sem instead of std
  2071. self._validate_numeric_only("sem", numeric_only)
  2072. return self.std(numeric_only=numeric_only) / (
  2073. self.count(numeric_only) - ddof
  2074. ).pow(0.5)
  2075. @doc(
  2076. template_header,
  2077. create_section_header("Parameters"),
  2078. kwargs_numeric_only,
  2079. create_section_header("Returns"),
  2080. template_returns,
  2081. create_section_header("See Also"),
  2082. "scipy.stats.kurtosis : Reference SciPy method.\n",
  2083. template_see_also,
  2084. create_section_header("Notes"),
  2085. "A minimum of four periods is required for the calculation.\n\n",
  2086. create_section_header("Examples"),
  2087. dedent(
  2088. """
  2089. The example below will show a rolling calculation with a window size of
  2090. four matching the equivalent function call using `scipy.stats`.
  2091. >>> arr = [1, 2, 3, 4, 999]
  2092. >>> import scipy.stats
  2093. >>> print(f"{{scipy.stats.kurtosis(arr[:-1], bias=False):.6f}}")
  2094. -1.200000
  2095. >>> print(f"{{scipy.stats.kurtosis(arr[1:], bias=False):.6f}}")
  2096. 3.999946
  2097. >>> s = pd.Series(arr)
  2098. >>> s.rolling(4).kurt()
  2099. 0 NaN
  2100. 1 NaN
  2101. 2 NaN
  2102. 3 -1.200000
  2103. 4 3.999946
  2104. dtype: float64
  2105. """
  2106. ).replace("\n", "", 1),
  2107. window_method="rolling",
  2108. aggregation_description="Fisher's definition of kurtosis without bias",
  2109. agg_method="kurt",
  2110. )
  2111. def kurt(self, numeric_only: bool = False):
  2112. return super().kurt(numeric_only=numeric_only)
  2113. @doc(
  2114. template_header,
  2115. create_section_header("Parameters"),
  2116. dedent(
  2117. """
  2118. quantile : float
  2119. Quantile to compute. 0 <= quantile <= 1.
  2120. interpolation : {{'linear', 'lower', 'higher', 'midpoint', 'nearest'}}
  2121. This optional parameter specifies the interpolation method to use,
  2122. when the desired quantile lies between two data points `i` and `j`:
  2123. * linear: `i + (j - i) * fraction`, where `fraction` is the
  2124. fractional part of the index surrounded by `i` and `j`.
  2125. * lower: `i`.
  2126. * higher: `j`.
  2127. * nearest: `i` or `j` whichever is nearest.
  2128. * midpoint: (`i` + `j`) / 2.
  2129. """
  2130. ).replace("\n", "", 1),
  2131. kwargs_numeric_only,
  2132. create_section_header("Returns"),
  2133. template_returns,
  2134. create_section_header("See Also"),
  2135. template_see_also,
  2136. create_section_header("Examples"),
  2137. dedent(
  2138. """
  2139. >>> s = pd.Series([1, 2, 3, 4])
  2140. >>> s.rolling(2).quantile(.4, interpolation='lower')
  2141. 0 NaN
  2142. 1 1.0
  2143. 2 2.0
  2144. 3 3.0
  2145. dtype: float64
  2146. >>> s.rolling(2).quantile(.4, interpolation='midpoint')
  2147. 0 NaN
  2148. 1 1.5
  2149. 2 2.5
  2150. 3 3.5
  2151. dtype: float64
  2152. """
  2153. ).replace("\n", "", 1),
  2154. window_method="rolling",
  2155. aggregation_description="quantile",
  2156. agg_method="quantile",
  2157. )
  2158. def quantile(
  2159. self,
  2160. quantile: float,
  2161. interpolation: QuantileInterpolation = "linear",
  2162. numeric_only: bool = False,
  2163. ):
  2164. return super().quantile(
  2165. quantile=quantile,
  2166. interpolation=interpolation,
  2167. numeric_only=numeric_only,
  2168. )
  2169. @doc(
  2170. template_header,
  2171. ".. versionadded:: 1.4.0 \n\n",
  2172. create_section_header("Parameters"),
  2173. dedent(
  2174. """
  2175. method : {{'average', 'min', 'max'}}, default 'average'
  2176. How to rank the group of records that have the same value (i.e. ties):
  2177. * average: average rank of the group
  2178. * min: lowest rank in the group
  2179. * max: highest rank in the group
  2180. ascending : bool, default True
  2181. Whether or not the elements should be ranked in ascending order.
  2182. pct : bool, default False
  2183. Whether or not to display the returned rankings in percentile
  2184. form.
  2185. """
  2186. ).replace("\n", "", 1),
  2187. kwargs_numeric_only,
  2188. create_section_header("Returns"),
  2189. template_returns,
  2190. create_section_header("See Also"),
  2191. template_see_also,
  2192. create_section_header("Examples"),
  2193. dedent(
  2194. """
  2195. >>> s = pd.Series([1, 4, 2, 3, 5, 3])
  2196. >>> s.rolling(3).rank()
  2197. 0 NaN
  2198. 1 NaN
  2199. 2 2.0
  2200. 3 2.0
  2201. 4 3.0
  2202. 5 1.5
  2203. dtype: float64
  2204. >>> s.rolling(3).rank(method="max")
  2205. 0 NaN
  2206. 1 NaN
  2207. 2 2.0
  2208. 3 2.0
  2209. 4 3.0
  2210. 5 2.0
  2211. dtype: float64
  2212. >>> s.rolling(3).rank(method="min")
  2213. 0 NaN
  2214. 1 NaN
  2215. 2 2.0
  2216. 3 2.0
  2217. 4 3.0
  2218. 5 1.0
  2219. dtype: float64
  2220. """
  2221. ).replace("\n", "", 1),
  2222. window_method="rolling",
  2223. aggregation_description="rank",
  2224. agg_method="rank",
  2225. )
  2226. def rank(
  2227. self,
  2228. method: WindowingRankType = "average",
  2229. ascending: bool = True,
  2230. pct: bool = False,
  2231. numeric_only: bool = False,
  2232. ):
  2233. return super().rank(
  2234. method=method,
  2235. ascending=ascending,
  2236. pct=pct,
  2237. numeric_only=numeric_only,
  2238. )
  2239. @doc(
  2240. template_header,
  2241. create_section_header("Parameters"),
  2242. dedent(
  2243. """
  2244. other : Series or DataFrame, optional
  2245. If not supplied then will default to self and produce pairwise
  2246. output.
  2247. pairwise : bool, default None
  2248. If False then only matching columns between self and other will be
  2249. used and the output will be a DataFrame.
  2250. If True then all pairwise combinations will be calculated and the
  2251. output will be a MultiIndexed DataFrame in the case of DataFrame
  2252. inputs. In the case of missing elements, only complete pairwise
  2253. observations will be used.
  2254. ddof : int, default 1
  2255. Delta Degrees of Freedom. The divisor used in calculations
  2256. is ``N - ddof``, where ``N`` represents the number of elements.
  2257. """
  2258. ).replace("\n", "", 1),
  2259. kwargs_numeric_only,
  2260. create_section_header("Returns"),
  2261. template_returns,
  2262. create_section_header("See Also"),
  2263. template_see_also[:-1],
  2264. window_method="rolling",
  2265. aggregation_description="sample covariance",
  2266. agg_method="cov",
  2267. )
  2268. def cov(
  2269. self,
  2270. other: DataFrame | Series | None = None,
  2271. pairwise: bool | None = None,
  2272. ddof: int = 1,
  2273. numeric_only: bool = False,
  2274. ):
  2275. return super().cov(
  2276. other=other,
  2277. pairwise=pairwise,
  2278. ddof=ddof,
  2279. numeric_only=numeric_only,
  2280. )
  2281. @doc(
  2282. template_header,
  2283. create_section_header("Parameters"),
  2284. dedent(
  2285. """
  2286. other : Series or DataFrame, optional
  2287. If not supplied then will default to self and produce pairwise
  2288. output.
  2289. pairwise : bool, default None
  2290. If False then only matching columns between self and other will be
  2291. used and the output will be a DataFrame.
  2292. If True then all pairwise combinations will be calculated and the
  2293. output will be a MultiIndexed DataFrame in the case of DataFrame
  2294. inputs. In the case of missing elements, only complete pairwise
  2295. observations will be used.
  2296. ddof : int, default 1
  2297. Delta Degrees of Freedom. The divisor used in calculations
  2298. is ``N - ddof``, where ``N`` represents the number of elements.
  2299. """
  2300. ).replace("\n", "", 1),
  2301. kwargs_numeric_only,
  2302. create_section_header("Returns"),
  2303. template_returns,
  2304. create_section_header("See Also"),
  2305. dedent(
  2306. """
  2307. cov : Similar method to calculate covariance.
  2308. numpy.corrcoef : NumPy Pearson's correlation calculation.
  2309. """
  2310. ).replace("\n", "", 1),
  2311. template_see_also,
  2312. create_section_header("Notes"),
  2313. dedent(
  2314. """
  2315. This function uses Pearson's definition of correlation
  2316. (https://en.wikipedia.org/wiki/Pearson_correlation_coefficient).
  2317. When `other` is not specified, the output will be self correlation (e.g.
  2318. all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise`
  2319. set to `True`.
  2320. Function will return ``NaN`` for correlations of equal valued sequences;
  2321. this is the result of a 0/0 division error.
  2322. When `pairwise` is set to `False`, only matching columns between `self` and
  2323. `other` will be used.
  2324. When `pairwise` is set to `True`, the output will be a MultiIndex DataFrame
  2325. with the original index on the first level, and the `other` DataFrame
  2326. columns on the second level.
  2327. In the case of missing elements, only complete pairwise observations
  2328. will be used.\n
  2329. """
  2330. ).replace("\n", "", 1),
  2331. create_section_header("Examples"),
  2332. dedent(
  2333. """
  2334. The below example shows a rolling calculation with a window size of
  2335. four matching the equivalent function call using :meth:`numpy.corrcoef`.
  2336. >>> v1 = [3, 3, 3, 5, 8]
  2337. >>> v2 = [3, 4, 4, 4, 8]
  2338. >>> # numpy returns a 2X2 array, the correlation coefficient
  2339. >>> # is the number at entry [0][1]
  2340. >>> print(f"{{np.corrcoef(v1[:-1], v2[:-1])[0][1]:.6f}}")
  2341. 0.333333
  2342. >>> print(f"{{np.corrcoef(v1[1:], v2[1:])[0][1]:.6f}}")
  2343. 0.916949
  2344. >>> s1 = pd.Series(v1)
  2345. >>> s2 = pd.Series(v2)
  2346. >>> s1.rolling(4).corr(s2)
  2347. 0 NaN
  2348. 1 NaN
  2349. 2 NaN
  2350. 3 0.333333
  2351. 4 0.916949
  2352. dtype: float64
  2353. The below example shows a similar rolling calculation on a
  2354. DataFrame using the pairwise option.
  2355. >>> matrix = np.array([[51., 35.], [49., 30.], [47., 32.],\
  2356. [46., 31.], [50., 36.]])
  2357. >>> print(np.corrcoef(matrix[:-1,0], matrix[:-1,1]).round(7))
  2358. [[1. 0.6263001]
  2359. [0.6263001 1. ]]
  2360. >>> print(np.corrcoef(matrix[1:,0], matrix[1:,1]).round(7))
  2361. [[1. 0.5553681]
  2362. [0.5553681 1. ]]
  2363. >>> df = pd.DataFrame(matrix, columns=['X','Y'])
  2364. >>> df
  2365. X Y
  2366. 0 51.0 35.0
  2367. 1 49.0 30.0
  2368. 2 47.0 32.0
  2369. 3 46.0 31.0
  2370. 4 50.0 36.0
  2371. >>> df.rolling(4).corr(pairwise=True)
  2372. X Y
  2373. 0 X NaN NaN
  2374. Y NaN NaN
  2375. 1 X NaN NaN
  2376. Y NaN NaN
  2377. 2 X NaN NaN
  2378. Y NaN NaN
  2379. 3 X 1.000000 0.626300
  2380. Y 0.626300 1.000000
  2381. 4 X 1.000000 0.555368
  2382. Y 0.555368 1.000000
  2383. """
  2384. ).replace("\n", "", 1),
  2385. window_method="rolling",
  2386. aggregation_description="correlation",
  2387. agg_method="corr",
  2388. )
  2389. def corr(
  2390. self,
  2391. other: DataFrame | Series | None = None,
  2392. pairwise: bool | None = None,
  2393. ddof: int = 1,
  2394. numeric_only: bool = False,
  2395. ):
  2396. return super().corr(
  2397. other=other,
  2398. pairwise=pairwise,
  2399. ddof=ddof,
  2400. numeric_only=numeric_only,
  2401. )
  2402. Rolling.__doc__ = Window.__doc__
  2403. class RollingGroupby(BaseWindowGroupby, Rolling):
  2404. """
  2405. Provide a rolling groupby implementation.
  2406. """
  2407. _attributes = Rolling._attributes + BaseWindowGroupby._attributes
  2408. def _get_window_indexer(self) -> GroupbyIndexer:
  2409. """
  2410. Return an indexer class that will compute the window start and end bounds
  2411. Returns
  2412. -------
  2413. GroupbyIndexer
  2414. """
  2415. rolling_indexer: type[BaseIndexer]
  2416. indexer_kwargs: dict[str, Any] | None = None
  2417. index_array = self._index_array
  2418. if isinstance(self.window, BaseIndexer):
  2419. rolling_indexer = type(self.window)
  2420. indexer_kwargs = self.window.__dict__.copy()
  2421. assert isinstance(indexer_kwargs, dict) # for mypy
  2422. # We'll be using the index of each group later
  2423. indexer_kwargs.pop("index_array", None)
  2424. window = self.window
  2425. elif self._win_freq_i8 is not None:
  2426. rolling_indexer = VariableWindowIndexer
  2427. # error: Incompatible types in assignment (expression has type
  2428. # "int", variable has type "BaseIndexer")
  2429. window = self._win_freq_i8 # type: ignore[assignment]
  2430. else:
  2431. rolling_indexer = FixedWindowIndexer
  2432. window = self.window
  2433. window_indexer = GroupbyIndexer(
  2434. index_array=index_array,
  2435. window_size=window,
  2436. groupby_indices=self._grouper.indices,
  2437. window_indexer=rolling_indexer,
  2438. indexer_kwargs=indexer_kwargs,
  2439. )
  2440. return window_indexer
  2441. def _validate_datetimelike_monotonic(self):
  2442. """
  2443. Validate that each group in self._on is monotonic
  2444. """
  2445. # GH 46061
  2446. if self._on.hasnans:
  2447. self._raise_monotonic_error("values must not have NaT")
  2448. for group_indices in self._grouper.indices.values():
  2449. group_on = self._on.take(group_indices)
  2450. if not (
  2451. group_on.is_monotonic_increasing or group_on.is_monotonic_decreasing
  2452. ):
  2453. on = "index" if self.on is None else self.on
  2454. raise ValueError(
  2455. f"Each group within {on} must be monotonic. "
  2456. f"Sort the values in {on} first."
  2457. )