resample.py 72 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302
  1. from __future__ import annotations
  2. import copy
  3. from textwrap import dedent
  4. from typing import (
  5. TYPE_CHECKING,
  6. Callable,
  7. Hashable,
  8. Literal,
  9. cast,
  10. final,
  11. no_type_check,
  12. )
  13. import warnings
  14. import numpy as np
  15. from pandas._libs import lib
  16. from pandas._libs.tslibs import (
  17. BaseOffset,
  18. IncompatibleFrequency,
  19. NaT,
  20. Period,
  21. Timedelta,
  22. Timestamp,
  23. to_offset,
  24. )
  25. from pandas._typing import (
  26. AnyArrayLike,
  27. Axis,
  28. AxisInt,
  29. Frequency,
  30. IndexLabel,
  31. NDFrameT,
  32. QuantileInterpolation,
  33. T,
  34. TimedeltaConvertibleTypes,
  35. TimeGrouperOrigin,
  36. TimestampConvertibleTypes,
  37. npt,
  38. )
  39. from pandas.compat.numpy import function as nv
  40. from pandas.errors import AbstractMethodError
  41. from pandas.util._decorators import (
  42. Appender,
  43. Substitution,
  44. doc,
  45. )
  46. from pandas.util._exceptions import find_stack_level
  47. from pandas.core.dtypes.generic import (
  48. ABCDataFrame,
  49. ABCSeries,
  50. )
  51. import pandas.core.algorithms as algos
  52. from pandas.core.apply import ResamplerWindowApply
  53. from pandas.core.base import PandasObject
  54. import pandas.core.common as com
  55. from pandas.core.generic import (
  56. NDFrame,
  57. _shared_docs,
  58. )
  59. from pandas.core.groupby.generic import SeriesGroupBy
  60. from pandas.core.groupby.groupby import (
  61. BaseGroupBy,
  62. GroupBy,
  63. _pipe_template,
  64. get_groupby,
  65. )
  66. from pandas.core.groupby.grouper import Grouper
  67. from pandas.core.groupby.ops import BinGrouper
  68. from pandas.core.indexes.datetimes import (
  69. DatetimeIndex,
  70. date_range,
  71. )
  72. from pandas.core.indexes.period import (
  73. PeriodIndex,
  74. period_range,
  75. )
  76. from pandas.core.indexes.timedeltas import (
  77. TimedeltaIndex,
  78. timedelta_range,
  79. )
  80. from pandas.tseries.frequencies import (
  81. is_subperiod,
  82. is_superperiod,
  83. )
  84. from pandas.tseries.offsets import (
  85. Day,
  86. Tick,
  87. )
  88. if TYPE_CHECKING:
  89. from pandas import (
  90. DataFrame,
  91. Index,
  92. Series,
  93. )
  94. _shared_docs_kwargs: dict[str, str] = {}
  95. class Resampler(BaseGroupBy, PandasObject):
  96. """
  97. Class for resampling datetimelike data, a groupby-like operation.
  98. See aggregate, transform, and apply functions on this object.
  99. It's easiest to use obj.resample(...) to use Resampler.
  100. Parameters
  101. ----------
  102. obj : Series or DataFrame
  103. groupby : TimeGrouper
  104. axis : int, default 0
  105. kind : str or None
  106. 'period', 'timestamp' to override default index treatment
  107. Returns
  108. -------
  109. a Resampler of the appropriate type
  110. Notes
  111. -----
  112. After resampling, see aggregate, apply, and transform functions.
  113. """
  114. grouper: BinGrouper
  115. _timegrouper: TimeGrouper
  116. binner: DatetimeIndex | TimedeltaIndex | PeriodIndex # depends on subclass
  117. exclusions: frozenset[Hashable] = frozenset() # for SelectionMixin compat
  118. _internal_names_set = set({"obj", "ax", "_indexer"})
  119. # to the groupby descriptor
  120. _attributes = [
  121. "freq",
  122. "axis",
  123. "closed",
  124. "label",
  125. "convention",
  126. "kind",
  127. "origin",
  128. "offset",
  129. ]
  130. def __init__(
  131. self,
  132. obj: NDFrame,
  133. timegrouper: TimeGrouper,
  134. axis: Axis = 0,
  135. kind=None,
  136. *,
  137. gpr_index: Index,
  138. group_keys: bool = False,
  139. selection=None,
  140. ) -> None:
  141. self._timegrouper = timegrouper
  142. self.keys = None
  143. self.sort = True
  144. self.axis = obj._get_axis_number(axis)
  145. self.kind = kind
  146. self.group_keys = group_keys
  147. self.as_index = True
  148. self.obj, self.ax, self._indexer = self._timegrouper._set_grouper(
  149. self._convert_obj(obj), sort=True, gpr_index=gpr_index
  150. )
  151. self.binner, self.grouper = self._get_binner()
  152. self._selection = selection
  153. if self._timegrouper.key is not None:
  154. self.exclusions = frozenset([self._timegrouper.key])
  155. else:
  156. self.exclusions = frozenset()
  157. def __str__(self) -> str:
  158. """
  159. Provide a nice str repr of our rolling object.
  160. """
  161. attrs = (
  162. f"{k}={getattr(self._timegrouper, k)}"
  163. for k in self._attributes
  164. if getattr(self._timegrouper, k, None) is not None
  165. )
  166. return f"{type(self).__name__} [{', '.join(attrs)}]"
  167. def __getattr__(self, attr: str):
  168. if attr in self._internal_names_set:
  169. return object.__getattribute__(self, attr)
  170. if attr in self._attributes:
  171. return getattr(self._timegrouper, attr)
  172. if attr in self.obj:
  173. return self[attr]
  174. return object.__getattribute__(self, attr)
  175. @property
  176. def _from_selection(self) -> bool:
  177. """
  178. Is the resampling from a DataFrame column or MultiIndex level.
  179. """
  180. # upsampling and PeriodIndex resampling do not work
  181. # with selection, this state used to catch and raise an error
  182. return self._timegrouper is not None and (
  183. self._timegrouper.key is not None or self._timegrouper.level is not None
  184. )
  185. def _convert_obj(self, obj: NDFrameT) -> NDFrameT:
  186. """
  187. Provide any conversions for the object in order to correctly handle.
  188. Parameters
  189. ----------
  190. obj : Series or DataFrame
  191. Returns
  192. -------
  193. Series or DataFrame
  194. """
  195. return obj._consolidate()
  196. def _get_binner_for_time(self):
  197. raise AbstractMethodError(self)
  198. @final
  199. def _get_binner(self):
  200. """
  201. Create the BinGrouper, assume that self.set_grouper(obj)
  202. has already been called.
  203. """
  204. binner, bins, binlabels = self._get_binner_for_time()
  205. assert len(bins) == len(binlabels)
  206. bin_grouper = BinGrouper(bins, binlabels, indexer=self._indexer)
  207. return binner, bin_grouper
  208. @Substitution(
  209. klass="Resampler",
  210. examples="""
  211. >>> df = pd.DataFrame({'A': [1, 2, 3, 4]},
  212. ... index=pd.date_range('2012-08-02', periods=4))
  213. >>> df
  214. A
  215. 2012-08-02 1
  216. 2012-08-03 2
  217. 2012-08-04 3
  218. 2012-08-05 4
  219. To get the difference between each 2-day period's maximum and minimum
  220. value in one pass, you can do
  221. >>> df.resample('2D').pipe(lambda x: x.max() - x.min())
  222. A
  223. 2012-08-02 1
  224. 2012-08-04 1""",
  225. )
  226. @Appender(_pipe_template)
  227. def pipe(
  228. self,
  229. func: Callable[..., T] | tuple[Callable[..., T], str],
  230. *args,
  231. **kwargs,
  232. ) -> T:
  233. return super().pipe(func, *args, **kwargs)
  234. _agg_see_also_doc = dedent(
  235. """
  236. See Also
  237. --------
  238. DataFrame.groupby.aggregate : Aggregate using callable, string, dict,
  239. or list of string/callables.
  240. DataFrame.resample.transform : Transforms the Series on each group
  241. based on the given function.
  242. DataFrame.aggregate: Aggregate using one or more
  243. operations over the specified axis.
  244. """
  245. )
  246. _agg_examples_doc = dedent(
  247. """
  248. Examples
  249. --------
  250. >>> s = pd.Series([1, 2, 3, 4, 5],
  251. ... index=pd.date_range('20130101', periods=5, freq='s'))
  252. >>> s
  253. 2013-01-01 00:00:00 1
  254. 2013-01-01 00:00:01 2
  255. 2013-01-01 00:00:02 3
  256. 2013-01-01 00:00:03 4
  257. 2013-01-01 00:00:04 5
  258. Freq: S, dtype: int64
  259. >>> r = s.resample('2s')
  260. >>> r.agg(np.sum)
  261. 2013-01-01 00:00:00 3
  262. 2013-01-01 00:00:02 7
  263. 2013-01-01 00:00:04 5
  264. Freq: 2S, dtype: int64
  265. >>> r.agg(['sum', 'mean', 'max'])
  266. sum mean max
  267. 2013-01-01 00:00:00 3 1.5 2
  268. 2013-01-01 00:00:02 7 3.5 4
  269. 2013-01-01 00:00:04 5 5.0 5
  270. >>> r.agg({'result': lambda x: x.mean() / x.std(),
  271. ... 'total': np.sum})
  272. result total
  273. 2013-01-01 00:00:00 2.121320 3
  274. 2013-01-01 00:00:02 4.949747 7
  275. 2013-01-01 00:00:04 NaN 5
  276. >>> r.agg(average="mean", total="sum")
  277. average total
  278. 2013-01-01 00:00:00 1.5 3
  279. 2013-01-01 00:00:02 3.5 7
  280. 2013-01-01 00:00:04 5.0 5
  281. """
  282. )
  283. @doc(
  284. _shared_docs["aggregate"],
  285. see_also=_agg_see_also_doc,
  286. examples=_agg_examples_doc,
  287. klass="DataFrame",
  288. axis="",
  289. )
  290. def aggregate(self, func=None, *args, **kwargs):
  291. result = ResamplerWindowApply(self, func, args=args, kwargs=kwargs).agg()
  292. if result is None:
  293. how = func
  294. result = self._groupby_and_aggregate(how, *args, **kwargs)
  295. return result
  296. agg = aggregate
  297. apply = aggregate
  298. def transform(self, arg, *args, **kwargs):
  299. """
  300. Call function producing a like-indexed Series on each group.
  301. Return a Series with the transformed values.
  302. Parameters
  303. ----------
  304. arg : function
  305. To apply to each group. Should return a Series with the same index.
  306. Returns
  307. -------
  308. Series
  309. Examples
  310. --------
  311. >>> s = pd.Series([1, 2],
  312. ... index=pd.date_range('20180101',
  313. ... periods=2,
  314. ... freq='1h'))
  315. >>> s
  316. 2018-01-01 00:00:00 1
  317. 2018-01-01 01:00:00 2
  318. Freq: H, dtype: int64
  319. >>> resampled = s.resample('15min')
  320. >>> resampled.transform(lambda x: (x - x.mean()) / x.std())
  321. 2018-01-01 00:00:00 NaN
  322. 2018-01-01 01:00:00 NaN
  323. Freq: H, dtype: float64
  324. """
  325. return self._selected_obj.groupby(self._timegrouper).transform(
  326. arg, *args, **kwargs
  327. )
  328. def _downsample(self, f, **kwargs):
  329. raise AbstractMethodError(self)
  330. def _upsample(self, f, limit=None, fill_value=None):
  331. raise AbstractMethodError(self)
  332. def _gotitem(self, key, ndim: int, subset=None):
  333. """
  334. Sub-classes to define. Return a sliced object.
  335. Parameters
  336. ----------
  337. key : string / list of selections
  338. ndim : {1, 2}
  339. requested ndim of result
  340. subset : object, default None
  341. subset to act on
  342. """
  343. grouper = self.grouper
  344. if subset is None:
  345. subset = self.obj
  346. if key is not None:
  347. subset = subset[key]
  348. else:
  349. # reached via Apply.agg_dict_like with selection=None and ndim=1
  350. assert subset.ndim == 1
  351. if ndim == 1:
  352. assert subset.ndim == 1
  353. grouped = get_groupby(
  354. subset, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys
  355. )
  356. return grouped
  357. def _groupby_and_aggregate(self, how, *args, **kwargs):
  358. """
  359. Re-evaluate the obj with a groupby aggregation.
  360. """
  361. grouper = self.grouper
  362. if self._selected_obj.ndim == 1:
  363. obj = self._selected_obj
  364. else:
  365. # Excludes `on` column when provided
  366. obj = self._obj_with_exclusions
  367. grouped = get_groupby(
  368. obj, by=None, grouper=grouper, axis=self.axis, group_keys=self.group_keys
  369. )
  370. try:
  371. if callable(how):
  372. # TODO: test_resample_apply_with_additional_args fails if we go
  373. # through the non-lambda path, not clear that it should.
  374. func = lambda x: how(x, *args, **kwargs)
  375. result = grouped.aggregate(func)
  376. else:
  377. result = grouped.aggregate(how, *args, **kwargs)
  378. except (AttributeError, KeyError):
  379. # we have a non-reducing function; try to evaluate
  380. # alternatively we want to evaluate only a column of the input
  381. # test_apply_to_one_column_of_df the function being applied references
  382. # a DataFrame column, but aggregate_item_by_item operates column-wise
  383. # on Series, raising AttributeError or KeyError
  384. # (depending on whether the column lookup uses getattr/__getitem__)
  385. result = grouped.apply(how, *args, **kwargs)
  386. except ValueError as err:
  387. if "Must produce aggregated value" in str(err):
  388. # raised in _aggregate_named
  389. # see test_apply_without_aggregation, test_apply_with_mutated_index
  390. pass
  391. else:
  392. raise
  393. # we have a non-reducing function
  394. # try to evaluate
  395. result = grouped.apply(how, *args, **kwargs)
  396. return self._wrap_result(result)
  397. def _get_resampler_for_grouping(self, groupby: GroupBy, key):
  398. """
  399. Return the correct class for resampling with groupby.
  400. """
  401. return self._resampler_for_grouping(groupby=groupby, key=key, parent=self)
  402. def _wrap_result(self, result):
  403. """
  404. Potentially wrap any results.
  405. """
  406. # GH 47705
  407. obj = self.obj
  408. if (
  409. isinstance(result, ABCDataFrame)
  410. and len(result) == 0
  411. and not isinstance(result.index, PeriodIndex)
  412. ):
  413. result = result.set_index(
  414. _asfreq_compat(obj.index[:0], freq=self.freq), append=True
  415. )
  416. if isinstance(result, ABCSeries) and self._selection is not None:
  417. result.name = self._selection
  418. if isinstance(result, ABCSeries) and result.empty:
  419. # When index is all NaT, result is empty but index is not
  420. result.index = _asfreq_compat(obj.index[:0], freq=self.freq)
  421. result.name = getattr(obj, "name", None)
  422. return result
  423. def ffill(self, limit=None):
  424. """
  425. Forward fill the values.
  426. Parameters
  427. ----------
  428. limit : int, optional
  429. Limit of how many values to fill.
  430. Returns
  431. -------
  432. An upsampled Series.
  433. See Also
  434. --------
  435. Series.fillna: Fill NA/NaN values using the specified method.
  436. DataFrame.fillna: Fill NA/NaN values using the specified method.
  437. """
  438. return self._upsample("ffill", limit=limit)
  439. def nearest(self, limit=None):
  440. """
  441. Resample by using the nearest value.
  442. When resampling data, missing values may appear (e.g., when the
  443. resampling frequency is higher than the original frequency).
  444. The `nearest` method will replace ``NaN`` values that appeared in
  445. the resampled data with the value from the nearest member of the
  446. sequence, based on the index value.
  447. Missing values that existed in the original data will not be modified.
  448. If `limit` is given, fill only this many values in each direction for
  449. each of the original values.
  450. Parameters
  451. ----------
  452. limit : int, optional
  453. Limit of how many values to fill.
  454. Returns
  455. -------
  456. Series or DataFrame
  457. An upsampled Series or DataFrame with ``NaN`` values filled with
  458. their nearest value.
  459. See Also
  460. --------
  461. backfill : Backward fill the new missing values in the resampled data.
  462. pad : Forward fill ``NaN`` values.
  463. Examples
  464. --------
  465. >>> s = pd.Series([1, 2],
  466. ... index=pd.date_range('20180101',
  467. ... periods=2,
  468. ... freq='1h'))
  469. >>> s
  470. 2018-01-01 00:00:00 1
  471. 2018-01-01 01:00:00 2
  472. Freq: H, dtype: int64
  473. >>> s.resample('15min').nearest()
  474. 2018-01-01 00:00:00 1
  475. 2018-01-01 00:15:00 1
  476. 2018-01-01 00:30:00 2
  477. 2018-01-01 00:45:00 2
  478. 2018-01-01 01:00:00 2
  479. Freq: 15T, dtype: int64
  480. Limit the number of upsampled values imputed by the nearest:
  481. >>> s.resample('15min').nearest(limit=1)
  482. 2018-01-01 00:00:00 1.0
  483. 2018-01-01 00:15:00 1.0
  484. 2018-01-01 00:30:00 NaN
  485. 2018-01-01 00:45:00 2.0
  486. 2018-01-01 01:00:00 2.0
  487. Freq: 15T, dtype: float64
  488. """
  489. return self._upsample("nearest", limit=limit)
  490. def bfill(self, limit=None):
  491. """
  492. Backward fill the new missing values in the resampled data.
  493. In statistics, imputation is the process of replacing missing data with
  494. substituted values [1]_. When resampling data, missing values may
  495. appear (e.g., when the resampling frequency is higher than the original
  496. frequency). The backward fill will replace NaN values that appeared in
  497. the resampled data with the next value in the original sequence.
  498. Missing values that existed in the original data will not be modified.
  499. Parameters
  500. ----------
  501. limit : int, optional
  502. Limit of how many values to fill.
  503. Returns
  504. -------
  505. Series, DataFrame
  506. An upsampled Series or DataFrame with backward filled NaN values.
  507. See Also
  508. --------
  509. bfill : Alias of backfill.
  510. fillna : Fill NaN values using the specified method, which can be
  511. 'backfill'.
  512. nearest : Fill NaN values with nearest neighbor starting from center.
  513. ffill : Forward fill NaN values.
  514. Series.fillna : Fill NaN values in the Series using the
  515. specified method, which can be 'backfill'.
  516. DataFrame.fillna : Fill NaN values in the DataFrame using the
  517. specified method, which can be 'backfill'.
  518. References
  519. ----------
  520. .. [1] https://en.wikipedia.org/wiki/Imputation_(statistics)
  521. Examples
  522. --------
  523. Resampling a Series:
  524. >>> s = pd.Series([1, 2, 3],
  525. ... index=pd.date_range('20180101', periods=3, freq='h'))
  526. >>> s
  527. 2018-01-01 00:00:00 1
  528. 2018-01-01 01:00:00 2
  529. 2018-01-01 02:00:00 3
  530. Freq: H, dtype: int64
  531. >>> s.resample('30min').bfill()
  532. 2018-01-01 00:00:00 1
  533. 2018-01-01 00:30:00 2
  534. 2018-01-01 01:00:00 2
  535. 2018-01-01 01:30:00 3
  536. 2018-01-01 02:00:00 3
  537. Freq: 30T, dtype: int64
  538. >>> s.resample('15min').bfill(limit=2)
  539. 2018-01-01 00:00:00 1.0
  540. 2018-01-01 00:15:00 NaN
  541. 2018-01-01 00:30:00 2.0
  542. 2018-01-01 00:45:00 2.0
  543. 2018-01-01 01:00:00 2.0
  544. 2018-01-01 01:15:00 NaN
  545. 2018-01-01 01:30:00 3.0
  546. 2018-01-01 01:45:00 3.0
  547. 2018-01-01 02:00:00 3.0
  548. Freq: 15T, dtype: float64
  549. Resampling a DataFrame that has missing values:
  550. >>> df = pd.DataFrame({'a': [2, np.nan, 6], 'b': [1, 3, 5]},
  551. ... index=pd.date_range('20180101', periods=3,
  552. ... freq='h'))
  553. >>> df
  554. a b
  555. 2018-01-01 00:00:00 2.0 1
  556. 2018-01-01 01:00:00 NaN 3
  557. 2018-01-01 02:00:00 6.0 5
  558. >>> df.resample('30min').bfill()
  559. a b
  560. 2018-01-01 00:00:00 2.0 1
  561. 2018-01-01 00:30:00 NaN 3
  562. 2018-01-01 01:00:00 NaN 3
  563. 2018-01-01 01:30:00 6.0 5
  564. 2018-01-01 02:00:00 6.0 5
  565. >>> df.resample('15min').bfill(limit=2)
  566. a b
  567. 2018-01-01 00:00:00 2.0 1.0
  568. 2018-01-01 00:15:00 NaN NaN
  569. 2018-01-01 00:30:00 NaN 3.0
  570. 2018-01-01 00:45:00 NaN 3.0
  571. 2018-01-01 01:00:00 NaN 3.0
  572. 2018-01-01 01:15:00 NaN NaN
  573. 2018-01-01 01:30:00 6.0 5.0
  574. 2018-01-01 01:45:00 6.0 5.0
  575. 2018-01-01 02:00:00 6.0 5.0
  576. """
  577. return self._upsample("bfill", limit=limit)
  578. def fillna(self, method, limit=None):
  579. """
  580. Fill missing values introduced by upsampling.
  581. In statistics, imputation is the process of replacing missing data with
  582. substituted values [1]_. When resampling data, missing values may
  583. appear (e.g., when the resampling frequency is higher than the original
  584. frequency).
  585. Missing values that existed in the original data will
  586. not be modified.
  587. Parameters
  588. ----------
  589. method : {'pad', 'backfill', 'ffill', 'bfill', 'nearest'}
  590. Method to use for filling holes in resampled data
  591. * 'pad' or 'ffill': use previous valid observation to fill gap
  592. (forward fill).
  593. * 'backfill' or 'bfill': use next valid observation to fill gap.
  594. * 'nearest': use nearest valid observation to fill gap.
  595. limit : int, optional
  596. Limit of how many consecutive missing values to fill.
  597. Returns
  598. -------
  599. Series or DataFrame
  600. An upsampled Series or DataFrame with missing values filled.
  601. See Also
  602. --------
  603. bfill : Backward fill NaN values in the resampled data.
  604. ffill : Forward fill NaN values in the resampled data.
  605. nearest : Fill NaN values in the resampled data
  606. with nearest neighbor starting from center.
  607. interpolate : Fill NaN values using interpolation.
  608. Series.fillna : Fill NaN values in the Series using the
  609. specified method, which can be 'bfill' and 'ffill'.
  610. DataFrame.fillna : Fill NaN values in the DataFrame using the
  611. specified method, which can be 'bfill' and 'ffill'.
  612. References
  613. ----------
  614. .. [1] https://en.wikipedia.org/wiki/Imputation_(statistics)
  615. Examples
  616. --------
  617. Resampling a Series:
  618. >>> s = pd.Series([1, 2, 3],
  619. ... index=pd.date_range('20180101', periods=3, freq='h'))
  620. >>> s
  621. 2018-01-01 00:00:00 1
  622. 2018-01-01 01:00:00 2
  623. 2018-01-01 02:00:00 3
  624. Freq: H, dtype: int64
  625. Without filling the missing values you get:
  626. >>> s.resample("30min").asfreq()
  627. 2018-01-01 00:00:00 1.0
  628. 2018-01-01 00:30:00 NaN
  629. 2018-01-01 01:00:00 2.0
  630. 2018-01-01 01:30:00 NaN
  631. 2018-01-01 02:00:00 3.0
  632. Freq: 30T, dtype: float64
  633. >>> s.resample('30min').fillna("backfill")
  634. 2018-01-01 00:00:00 1
  635. 2018-01-01 00:30:00 2
  636. 2018-01-01 01:00:00 2
  637. 2018-01-01 01:30:00 3
  638. 2018-01-01 02:00:00 3
  639. Freq: 30T, dtype: int64
  640. >>> s.resample('15min').fillna("backfill", limit=2)
  641. 2018-01-01 00:00:00 1.0
  642. 2018-01-01 00:15:00 NaN
  643. 2018-01-01 00:30:00 2.0
  644. 2018-01-01 00:45:00 2.0
  645. 2018-01-01 01:00:00 2.0
  646. 2018-01-01 01:15:00 NaN
  647. 2018-01-01 01:30:00 3.0
  648. 2018-01-01 01:45:00 3.0
  649. 2018-01-01 02:00:00 3.0
  650. Freq: 15T, dtype: float64
  651. >>> s.resample('30min').fillna("pad")
  652. 2018-01-01 00:00:00 1
  653. 2018-01-01 00:30:00 1
  654. 2018-01-01 01:00:00 2
  655. 2018-01-01 01:30:00 2
  656. 2018-01-01 02:00:00 3
  657. Freq: 30T, dtype: int64
  658. >>> s.resample('30min').fillna("nearest")
  659. 2018-01-01 00:00:00 1
  660. 2018-01-01 00:30:00 2
  661. 2018-01-01 01:00:00 2
  662. 2018-01-01 01:30:00 3
  663. 2018-01-01 02:00:00 3
  664. Freq: 30T, dtype: int64
  665. Missing values present before the upsampling are not affected.
  666. >>> sm = pd.Series([1, None, 3],
  667. ... index=pd.date_range('20180101', periods=3, freq='h'))
  668. >>> sm
  669. 2018-01-01 00:00:00 1.0
  670. 2018-01-01 01:00:00 NaN
  671. 2018-01-01 02:00:00 3.0
  672. Freq: H, dtype: float64
  673. >>> sm.resample('30min').fillna('backfill')
  674. 2018-01-01 00:00:00 1.0
  675. 2018-01-01 00:30:00 NaN
  676. 2018-01-01 01:00:00 NaN
  677. 2018-01-01 01:30:00 3.0
  678. 2018-01-01 02:00:00 3.0
  679. Freq: 30T, dtype: float64
  680. >>> sm.resample('30min').fillna('pad')
  681. 2018-01-01 00:00:00 1.0
  682. 2018-01-01 00:30:00 1.0
  683. 2018-01-01 01:00:00 NaN
  684. 2018-01-01 01:30:00 NaN
  685. 2018-01-01 02:00:00 3.0
  686. Freq: 30T, dtype: float64
  687. >>> sm.resample('30min').fillna('nearest')
  688. 2018-01-01 00:00:00 1.0
  689. 2018-01-01 00:30:00 NaN
  690. 2018-01-01 01:00:00 NaN
  691. 2018-01-01 01:30:00 3.0
  692. 2018-01-01 02:00:00 3.0
  693. Freq: 30T, dtype: float64
  694. DataFrame resampling is done column-wise. All the same options are
  695. available.
  696. >>> df = pd.DataFrame({'a': [2, np.nan, 6], 'b': [1, 3, 5]},
  697. ... index=pd.date_range('20180101', periods=3,
  698. ... freq='h'))
  699. >>> df
  700. a b
  701. 2018-01-01 00:00:00 2.0 1
  702. 2018-01-01 01:00:00 NaN 3
  703. 2018-01-01 02:00:00 6.0 5
  704. >>> df.resample('30min').fillna("bfill")
  705. a b
  706. 2018-01-01 00:00:00 2.0 1
  707. 2018-01-01 00:30:00 NaN 3
  708. 2018-01-01 01:00:00 NaN 3
  709. 2018-01-01 01:30:00 6.0 5
  710. 2018-01-01 02:00:00 6.0 5
  711. """
  712. return self._upsample(method, limit=limit)
  713. @doc(NDFrame.interpolate, **_shared_docs_kwargs)
  714. def interpolate(
  715. self,
  716. method: QuantileInterpolation = "linear",
  717. *,
  718. axis: Axis = 0,
  719. limit=None,
  720. inplace: bool = False,
  721. limit_direction: Literal["forward", "backward", "both"] = "forward",
  722. limit_area=None,
  723. downcast=None,
  724. **kwargs,
  725. ):
  726. """
  727. Interpolate values according to different methods.
  728. """
  729. result = self._upsample("asfreq")
  730. return result.interpolate(
  731. method=method,
  732. axis=axis,
  733. limit=limit,
  734. inplace=inplace,
  735. limit_direction=limit_direction,
  736. limit_area=limit_area,
  737. downcast=downcast,
  738. **kwargs,
  739. )
  740. def asfreq(self, fill_value=None):
  741. """
  742. Return the values at the new freq, essentially a reindex.
  743. Parameters
  744. ----------
  745. fill_value : scalar, optional
  746. Value to use for missing values, applied during upsampling (note
  747. this does not fill NaNs that already were present).
  748. Returns
  749. -------
  750. DataFrame or Series
  751. Values at the specified freq.
  752. See Also
  753. --------
  754. Series.asfreq: Convert TimeSeries to specified frequency.
  755. DataFrame.asfreq: Convert TimeSeries to specified frequency.
  756. """
  757. return self._upsample("asfreq", fill_value=fill_value)
  758. def sum(
  759. self,
  760. numeric_only: bool = False,
  761. min_count: int = 0,
  762. *args,
  763. **kwargs,
  764. ):
  765. maybe_warn_args_and_kwargs(type(self), "sum", args, kwargs)
  766. nv.validate_resampler_func("sum", args, kwargs)
  767. return self._downsample("sum", numeric_only=numeric_only, min_count=min_count)
  768. @doc(GroupBy.prod)
  769. def prod(
  770. self,
  771. numeric_only: bool = False,
  772. min_count: int = 0,
  773. *args,
  774. **kwargs,
  775. ):
  776. maybe_warn_args_and_kwargs(type(self), "prod", args, kwargs)
  777. nv.validate_resampler_func("prod", args, kwargs)
  778. return self._downsample("prod", numeric_only=numeric_only, min_count=min_count)
  779. def min(
  780. self,
  781. numeric_only: bool = False,
  782. min_count: int = 0,
  783. *args,
  784. **kwargs,
  785. ):
  786. maybe_warn_args_and_kwargs(type(self), "min", args, kwargs)
  787. nv.validate_resampler_func("min", args, kwargs)
  788. return self._downsample("min", numeric_only=numeric_only, min_count=min_count)
  789. def max(
  790. self,
  791. numeric_only: bool = False,
  792. min_count: int = 0,
  793. *args,
  794. **kwargs,
  795. ):
  796. maybe_warn_args_and_kwargs(type(self), "max", args, kwargs)
  797. nv.validate_resampler_func("max", args, kwargs)
  798. return self._downsample("max", numeric_only=numeric_only, min_count=min_count)
  799. @doc(GroupBy.first)
  800. def first(
  801. self,
  802. numeric_only: bool = False,
  803. min_count: int = 0,
  804. *args,
  805. **kwargs,
  806. ):
  807. maybe_warn_args_and_kwargs(type(self), "first", args, kwargs)
  808. nv.validate_resampler_func("first", args, kwargs)
  809. return self._downsample("first", numeric_only=numeric_only, min_count=min_count)
  810. @doc(GroupBy.last)
  811. def last(
  812. self,
  813. numeric_only: bool = False,
  814. min_count: int = 0,
  815. *args,
  816. **kwargs,
  817. ):
  818. maybe_warn_args_and_kwargs(type(self), "last", args, kwargs)
  819. nv.validate_resampler_func("last", args, kwargs)
  820. return self._downsample("last", numeric_only=numeric_only, min_count=min_count)
  821. @doc(GroupBy.median)
  822. def median(self, numeric_only: bool = False, *args, **kwargs):
  823. maybe_warn_args_and_kwargs(type(self), "median", args, kwargs)
  824. nv.validate_resampler_func("median", args, kwargs)
  825. return self._downsample("median", numeric_only=numeric_only)
  826. def mean(
  827. self,
  828. numeric_only: bool = False,
  829. *args,
  830. **kwargs,
  831. ):
  832. """
  833. Compute mean of groups, excluding missing values.
  834. Parameters
  835. ----------
  836. numeric_only : bool, default False
  837. Include only `float`, `int` or `boolean` data.
  838. .. versionchanged:: 2.0.0
  839. numeric_only now defaults to ``False``.
  840. Returns
  841. -------
  842. DataFrame or Series
  843. Mean of values within each group.
  844. """
  845. maybe_warn_args_and_kwargs(type(self), "mean", args, kwargs)
  846. nv.validate_resampler_func("mean", args, kwargs)
  847. return self._downsample("mean", numeric_only=numeric_only)
  848. def std(
  849. self,
  850. ddof: int = 1,
  851. numeric_only: bool = False,
  852. *args,
  853. **kwargs,
  854. ):
  855. """
  856. Compute standard deviation of groups, excluding missing values.
  857. Parameters
  858. ----------
  859. ddof : int, default 1
  860. Degrees of freedom.
  861. numeric_only : bool, default False
  862. Include only `float`, `int` or `boolean` data.
  863. .. versionadded:: 1.5.0
  864. .. versionchanged:: 2.0.0
  865. numeric_only now defaults to ``False``.
  866. Returns
  867. -------
  868. DataFrame or Series
  869. Standard deviation of values within each group.
  870. """
  871. maybe_warn_args_and_kwargs(type(self), "std", args, kwargs)
  872. nv.validate_resampler_func("std", args, kwargs)
  873. return self._downsample("std", ddof=ddof, numeric_only=numeric_only)
  874. def var(
  875. self,
  876. ddof: int = 1,
  877. numeric_only: bool = False,
  878. *args,
  879. **kwargs,
  880. ):
  881. """
  882. Compute variance of groups, excluding missing values.
  883. Parameters
  884. ----------
  885. ddof : int, default 1
  886. Degrees of freedom.
  887. numeric_only : bool, default False
  888. Include only `float`, `int` or `boolean` data.
  889. .. versionadded:: 1.5.0
  890. .. versionchanged:: 2.0.0
  891. numeric_only now defaults to ``False``.
  892. Returns
  893. -------
  894. DataFrame or Series
  895. Variance of values within each group.
  896. """
  897. maybe_warn_args_and_kwargs(type(self), "var", args, kwargs)
  898. nv.validate_resampler_func("var", args, kwargs)
  899. return self._downsample("var", ddof=ddof, numeric_only=numeric_only)
  900. @doc(GroupBy.sem)
  901. def sem(
  902. self,
  903. ddof: int = 1,
  904. numeric_only: bool = False,
  905. *args,
  906. **kwargs,
  907. ):
  908. maybe_warn_args_and_kwargs(type(self), "sem", args, kwargs)
  909. nv.validate_resampler_func("sem", args, kwargs)
  910. return self._downsample("sem", ddof=ddof, numeric_only=numeric_only)
  911. @doc(GroupBy.ohlc)
  912. def ohlc(
  913. self,
  914. *args,
  915. **kwargs,
  916. ):
  917. maybe_warn_args_and_kwargs(type(self), "ohlc", args, kwargs)
  918. nv.validate_resampler_func("ohlc", args, kwargs)
  919. return self._downsample("ohlc")
  920. @doc(SeriesGroupBy.nunique)
  921. def nunique(
  922. self,
  923. *args,
  924. **kwargs,
  925. ):
  926. maybe_warn_args_and_kwargs(type(self), "nunique", args, kwargs)
  927. nv.validate_resampler_func("nunique", args, kwargs)
  928. return self._downsample("nunique")
  929. @doc(GroupBy.size)
  930. def size(self):
  931. result = self._downsample("size")
  932. # If the result is a non-empty DataFrame we stack to get a Series
  933. # GH 46826
  934. if isinstance(result, ABCDataFrame) and not result.empty:
  935. result = result.stack()
  936. if not len(self.ax):
  937. from pandas import Series
  938. if self._selected_obj.ndim == 1:
  939. name = self._selected_obj.name
  940. else:
  941. name = None
  942. result = Series([], index=result.index, dtype="int64", name=name)
  943. return result
  944. @doc(GroupBy.count)
  945. def count(self):
  946. result = self._downsample("count")
  947. if not len(self.ax):
  948. if self._selected_obj.ndim == 1:
  949. result = type(self._selected_obj)(
  950. [], index=result.index, dtype="int64", name=self._selected_obj.name
  951. )
  952. else:
  953. from pandas import DataFrame
  954. result = DataFrame(
  955. [], index=result.index, columns=result.columns, dtype="int64"
  956. )
  957. return result
  958. def quantile(self, q: float | AnyArrayLike = 0.5, **kwargs):
  959. """
  960. Return value at the given quantile.
  961. Parameters
  962. ----------
  963. q : float or array-like, default 0.5 (50% quantile)
  964. Returns
  965. -------
  966. DataFrame or Series
  967. Quantile of values within each group.
  968. See Also
  969. --------
  970. Series.quantile
  971. Return a series, where the index is q and the values are the quantiles.
  972. DataFrame.quantile
  973. Return a DataFrame, where the columns are the columns of self,
  974. and the values are the quantiles.
  975. DataFrameGroupBy.quantile
  976. Return a DataFrame, where the columns are groupby columns,
  977. and the values are its quantiles.
  978. """
  979. return self._downsample("quantile", q=q, **kwargs)
  980. class _GroupByMixin(PandasObject):
  981. """
  982. Provide the groupby facilities.
  983. """
  984. _attributes: list[str] # in practice the same as Resampler._attributes
  985. _selection: IndexLabel | None = None
  986. _groupby: GroupBy
  987. _timegrouper: TimeGrouper
  988. def __init__(
  989. self,
  990. *,
  991. parent: Resampler,
  992. groupby: GroupBy,
  993. key=None,
  994. selection: IndexLabel | None = None,
  995. ) -> None:
  996. # reached via ._gotitem and _get_resampler_for_grouping
  997. assert isinstance(groupby, GroupBy), type(groupby)
  998. # parent is always a Resampler, sometimes a _GroupByMixin
  999. assert isinstance(parent, Resampler), type(parent)
  1000. # initialize our GroupByMixin object with
  1001. # the resampler attributes
  1002. for attr in self._attributes:
  1003. setattr(self, attr, getattr(parent, attr))
  1004. self._selection = selection
  1005. self.binner = parent.binner
  1006. self.key = key
  1007. self._groupby = groupby
  1008. self._timegrouper = copy.copy(parent._timegrouper)
  1009. self.ax = parent.ax
  1010. self.obj = parent.obj
  1011. @no_type_check
  1012. def _apply(self, f, *args, **kwargs):
  1013. """
  1014. Dispatch to _upsample; we are stripping all of the _upsample kwargs and
  1015. performing the original function call on the grouped object.
  1016. """
  1017. def func(x):
  1018. x = self._resampler_cls(x, timegrouper=self._timegrouper, gpr_index=self.ax)
  1019. if isinstance(f, str):
  1020. return getattr(x, f)(**kwargs)
  1021. return x.apply(f, *args, **kwargs)
  1022. result = self._groupby.apply(func)
  1023. return self._wrap_result(result)
  1024. _upsample = _apply
  1025. _downsample = _apply
  1026. _groupby_and_aggregate = _apply
  1027. @final
  1028. def _gotitem(self, key, ndim, subset=None):
  1029. """
  1030. Sub-classes to define. Return a sliced object.
  1031. Parameters
  1032. ----------
  1033. key : string / list of selections
  1034. ndim : {1, 2}
  1035. requested ndim of result
  1036. subset : object, default None
  1037. subset to act on
  1038. """
  1039. # create a new object to prevent aliasing
  1040. if subset is None:
  1041. subset = self.obj
  1042. if key is not None:
  1043. subset = subset[key]
  1044. else:
  1045. # reached via Apply.agg_dict_like with selection=None, ndim=1
  1046. assert subset.ndim == 1
  1047. # Try to select from a DataFrame, falling back to a Series
  1048. try:
  1049. if isinstance(key, list) and self.key not in key and self.key is not None:
  1050. key.append(self.key)
  1051. groupby = self._groupby[key]
  1052. except IndexError:
  1053. groupby = self._groupby
  1054. selection = None
  1055. if subset.ndim == 2 and (
  1056. (lib.is_scalar(key) and key in subset) or lib.is_list_like(key)
  1057. ):
  1058. selection = key
  1059. elif subset.ndim == 1 and lib.is_scalar(key) and key == subset.name:
  1060. selection = key
  1061. new_rs = type(self)(
  1062. groupby=groupby,
  1063. parent=cast(Resampler, self),
  1064. selection=selection,
  1065. )
  1066. return new_rs
  1067. class DatetimeIndexResampler(Resampler):
  1068. @property
  1069. def _resampler_for_grouping(self):
  1070. return DatetimeIndexResamplerGroupby
  1071. def _get_binner_for_time(self):
  1072. # this is how we are actually creating the bins
  1073. if self.kind == "period":
  1074. return self._timegrouper._get_time_period_bins(self.ax)
  1075. return self._timegrouper._get_time_bins(self.ax)
  1076. def _downsample(self, how, **kwargs):
  1077. """
  1078. Downsample the cython defined function.
  1079. Parameters
  1080. ----------
  1081. how : string / cython mapped function
  1082. **kwargs : kw args passed to how function
  1083. """
  1084. how = com.get_cython_func(how) or how
  1085. ax = self.ax
  1086. if self._selected_obj.ndim == 1:
  1087. obj = self._selected_obj
  1088. else:
  1089. # Excludes `on` column when provided
  1090. obj = self._obj_with_exclusions
  1091. if not len(ax):
  1092. # reset to the new freq
  1093. obj = obj.copy()
  1094. obj.index = obj.index._with_freq(self.freq)
  1095. assert obj.index.freq == self.freq, (obj.index.freq, self.freq)
  1096. return obj
  1097. # do we have a regular frequency
  1098. # error: Item "None" of "Optional[Any]" has no attribute "binlabels"
  1099. if (
  1100. (ax.freq is not None or ax.inferred_freq is not None)
  1101. and len(self.grouper.binlabels) > len(ax)
  1102. and how is None
  1103. ):
  1104. # let's do an asfreq
  1105. return self.asfreq()
  1106. # we are downsampling
  1107. # we want to call the actual grouper method here
  1108. result = obj.groupby(self.grouper, axis=self.axis).aggregate(how, **kwargs)
  1109. return self._wrap_result(result)
  1110. def _adjust_binner_for_upsample(self, binner):
  1111. """
  1112. Adjust our binner when upsampling.
  1113. The range of a new index should not be outside specified range
  1114. """
  1115. if self.closed == "right":
  1116. binner = binner[1:]
  1117. else:
  1118. binner = binner[:-1]
  1119. return binner
  1120. def _upsample(self, method, limit=None, fill_value=None):
  1121. """
  1122. Parameters
  1123. ----------
  1124. method : string {'backfill', 'bfill', 'pad',
  1125. 'ffill', 'asfreq'} method for upsampling
  1126. limit : int, default None
  1127. Maximum size gap to fill when reindexing
  1128. fill_value : scalar, default None
  1129. Value to use for missing values
  1130. See Also
  1131. --------
  1132. .fillna: Fill NA/NaN values using the specified method.
  1133. """
  1134. if self.axis:
  1135. raise AssertionError("axis must be 0")
  1136. if self._from_selection:
  1137. raise ValueError(
  1138. "Upsampling from level= or on= selection "
  1139. "is not supported, use .set_index(...) "
  1140. "to explicitly set index to datetime-like"
  1141. )
  1142. ax = self.ax
  1143. obj = self._selected_obj
  1144. binner = self.binner
  1145. res_index = self._adjust_binner_for_upsample(binner)
  1146. # if we have the same frequency as our axis, then we are equal sampling
  1147. if (
  1148. limit is None
  1149. and to_offset(ax.inferred_freq) == self.freq
  1150. and len(obj) == len(res_index)
  1151. ):
  1152. result = obj.copy()
  1153. result.index = res_index
  1154. else:
  1155. result = obj.reindex(
  1156. res_index, method=method, limit=limit, fill_value=fill_value
  1157. )
  1158. return self._wrap_result(result)
  1159. def _wrap_result(self, result):
  1160. result = super()._wrap_result(result)
  1161. # we may have a different kind that we were asked originally
  1162. # convert if needed
  1163. if self.kind == "period" and not isinstance(result.index, PeriodIndex):
  1164. result.index = result.index.to_period(self.freq)
  1165. return result
  1166. class DatetimeIndexResamplerGroupby(_GroupByMixin, DatetimeIndexResampler):
  1167. """
  1168. Provides a resample of a groupby implementation
  1169. """
  1170. @property
  1171. def _resampler_cls(self):
  1172. return DatetimeIndexResampler
  1173. class PeriodIndexResampler(DatetimeIndexResampler):
  1174. @property
  1175. def _resampler_for_grouping(self):
  1176. return PeriodIndexResamplerGroupby
  1177. def _get_binner_for_time(self):
  1178. if self.kind == "timestamp":
  1179. return super()._get_binner_for_time()
  1180. return self._timegrouper._get_period_bins(self.ax)
  1181. def _convert_obj(self, obj: NDFrameT) -> NDFrameT:
  1182. obj = super()._convert_obj(obj)
  1183. if self._from_selection:
  1184. # see GH 14008, GH 12871
  1185. msg = (
  1186. "Resampling from level= or on= selection "
  1187. "with a PeriodIndex is not currently supported, "
  1188. "use .set_index(...) to explicitly set index"
  1189. )
  1190. raise NotImplementedError(msg)
  1191. # convert to timestamp
  1192. if self.kind == "timestamp":
  1193. obj = obj.to_timestamp(how=self.convention)
  1194. return obj
  1195. def _downsample(self, how, **kwargs):
  1196. """
  1197. Downsample the cython defined function.
  1198. Parameters
  1199. ----------
  1200. how : string / cython mapped function
  1201. **kwargs : kw args passed to how function
  1202. """
  1203. # we may need to actually resample as if we are timestamps
  1204. if self.kind == "timestamp":
  1205. return super()._downsample(how, **kwargs)
  1206. how = com.get_cython_func(how) or how
  1207. ax = self.ax
  1208. if is_subperiod(ax.freq, self.freq):
  1209. # Downsampling
  1210. return self._groupby_and_aggregate(how, **kwargs)
  1211. elif is_superperiod(ax.freq, self.freq):
  1212. if how == "ohlc":
  1213. # GH #13083
  1214. # upsampling to subperiods is handled as an asfreq, which works
  1215. # for pure aggregating/reducing methods
  1216. # OHLC reduces along the time dimension, but creates multiple
  1217. # values for each period -> handle by _groupby_and_aggregate()
  1218. return self._groupby_and_aggregate(how)
  1219. return self.asfreq()
  1220. elif ax.freq == self.freq:
  1221. return self.asfreq()
  1222. raise IncompatibleFrequency(
  1223. f"Frequency {ax.freq} cannot be resampled to {self.freq}, "
  1224. "as they are not sub or super periods"
  1225. )
  1226. def _upsample(self, method, limit=None, fill_value=None):
  1227. """
  1228. Parameters
  1229. ----------
  1230. method : {'backfill', 'bfill', 'pad', 'ffill'}
  1231. Method for upsampling.
  1232. limit : int, default None
  1233. Maximum size gap to fill when reindexing.
  1234. fill_value : scalar, default None
  1235. Value to use for missing values.
  1236. See Also
  1237. --------
  1238. .fillna: Fill NA/NaN values using the specified method.
  1239. """
  1240. # we may need to actually resample as if we are timestamps
  1241. if self.kind == "timestamp":
  1242. return super()._upsample(method, limit=limit, fill_value=fill_value)
  1243. ax = self.ax
  1244. obj = self.obj
  1245. new_index = self.binner
  1246. # Start vs. end of period
  1247. memb = ax.asfreq(self.freq, how=self.convention)
  1248. # Get the fill indexer
  1249. indexer = memb.get_indexer(new_index, method=method, limit=limit)
  1250. new_obj = _take_new_index(
  1251. obj,
  1252. indexer,
  1253. new_index,
  1254. axis=self.axis,
  1255. )
  1256. return self._wrap_result(new_obj)
  1257. class PeriodIndexResamplerGroupby(_GroupByMixin, PeriodIndexResampler):
  1258. """
  1259. Provides a resample of a groupby implementation.
  1260. """
  1261. @property
  1262. def _resampler_cls(self):
  1263. return PeriodIndexResampler
  1264. class TimedeltaIndexResampler(DatetimeIndexResampler):
  1265. @property
  1266. def _resampler_for_grouping(self):
  1267. return TimedeltaIndexResamplerGroupby
  1268. def _get_binner_for_time(self):
  1269. return self._timegrouper._get_time_delta_bins(self.ax)
  1270. def _adjust_binner_for_upsample(self, binner):
  1271. """
  1272. Adjust our binner when upsampling.
  1273. The range of a new index is allowed to be greater than original range
  1274. so we don't need to change the length of a binner, GH 13022
  1275. """
  1276. return binner
  1277. class TimedeltaIndexResamplerGroupby(_GroupByMixin, TimedeltaIndexResampler):
  1278. """
  1279. Provides a resample of a groupby implementation.
  1280. """
  1281. @property
  1282. def _resampler_cls(self):
  1283. return TimedeltaIndexResampler
  1284. def get_resampler(obj: Series | DataFrame, kind=None, **kwds) -> Resampler:
  1285. """
  1286. Create a TimeGrouper and return our resampler.
  1287. """
  1288. tg = TimeGrouper(**kwds)
  1289. return tg._get_resampler(obj, kind=kind)
  1290. get_resampler.__doc__ = Resampler.__doc__
  1291. def get_resampler_for_grouping(
  1292. groupby: GroupBy,
  1293. rule,
  1294. how=None,
  1295. fill_method=None,
  1296. limit=None,
  1297. kind=None,
  1298. on=None,
  1299. **kwargs,
  1300. ) -> Resampler:
  1301. """
  1302. Return our appropriate resampler when grouping as well.
  1303. """
  1304. # .resample uses 'on' similar to how .groupby uses 'key'
  1305. tg = TimeGrouper(freq=rule, key=on, **kwargs)
  1306. resampler = tg._get_resampler(groupby.obj, kind=kind)
  1307. return resampler._get_resampler_for_grouping(groupby=groupby, key=tg.key)
  1308. class TimeGrouper(Grouper):
  1309. """
  1310. Custom groupby class for time-interval grouping.
  1311. Parameters
  1312. ----------
  1313. freq : pandas date offset or offset alias for identifying bin edges
  1314. closed : closed end of interval; 'left' or 'right'
  1315. label : interval boundary to use for labeling; 'left' or 'right'
  1316. convention : {'start', 'end', 'e', 's'}
  1317. If axis is PeriodIndex
  1318. """
  1319. _attributes = Grouper._attributes + (
  1320. "closed",
  1321. "label",
  1322. "how",
  1323. "kind",
  1324. "convention",
  1325. "origin",
  1326. "offset",
  1327. )
  1328. origin: TimeGrouperOrigin
  1329. def __init__(
  1330. self,
  1331. freq: Frequency = "Min",
  1332. closed: Literal["left", "right"] | None = None,
  1333. label: Literal["left", "right"] | None = None,
  1334. how: str = "mean",
  1335. axis: Axis = 0,
  1336. fill_method=None,
  1337. limit=None,
  1338. kind: str | None = None,
  1339. convention: Literal["start", "end", "e", "s"] | None = None,
  1340. origin: Literal["epoch", "start", "start_day", "end", "end_day"]
  1341. | TimestampConvertibleTypes = "start_day",
  1342. offset: TimedeltaConvertibleTypes | None = None,
  1343. group_keys: bool = False,
  1344. **kwargs,
  1345. ) -> None:
  1346. # Check for correctness of the keyword arguments which would
  1347. # otherwise silently use the default if misspelled
  1348. if label not in {None, "left", "right"}:
  1349. raise ValueError(f"Unsupported value {label} for `label`")
  1350. if closed not in {None, "left", "right"}:
  1351. raise ValueError(f"Unsupported value {closed} for `closed`")
  1352. if convention not in {None, "start", "end", "e", "s"}:
  1353. raise ValueError(f"Unsupported value {convention} for `convention`")
  1354. freq = to_offset(freq)
  1355. end_types = {"M", "A", "Q", "BM", "BA", "BQ", "W"}
  1356. rule = freq.rule_code
  1357. if rule in end_types or ("-" in rule and rule[: rule.find("-")] in end_types):
  1358. if closed is None:
  1359. closed = "right"
  1360. if label is None:
  1361. label = "right"
  1362. else:
  1363. # The backward resample sets ``closed`` to ``'right'`` by default
  1364. # since the last value should be considered as the edge point for
  1365. # the last bin. When origin in "end" or "end_day", the value for a
  1366. # specific ``Timestamp`` index stands for the resample result from
  1367. # the current ``Timestamp`` minus ``freq`` to the current
  1368. # ``Timestamp`` with a right close.
  1369. if origin in ["end", "end_day"]:
  1370. if closed is None:
  1371. closed = "right"
  1372. if label is None:
  1373. label = "right"
  1374. else:
  1375. if closed is None:
  1376. closed = "left"
  1377. if label is None:
  1378. label = "left"
  1379. self.closed = closed
  1380. self.label = label
  1381. self.kind = kind
  1382. self.convention = convention if convention is not None else "e"
  1383. self.how = how
  1384. self.fill_method = fill_method
  1385. self.limit = limit
  1386. self.group_keys = group_keys
  1387. if origin in ("epoch", "start", "start_day", "end", "end_day"):
  1388. # error: Incompatible types in assignment (expression has type "Union[Union[
  1389. # Timestamp, datetime, datetime64, signedinteger[_64Bit], float, str],
  1390. # Literal['epoch', 'start', 'start_day', 'end', 'end_day']]", variable has
  1391. # type "Union[Timestamp, Literal['epoch', 'start', 'start_day', 'end',
  1392. # 'end_day']]")
  1393. self.origin = origin # type: ignore[assignment]
  1394. else:
  1395. try:
  1396. self.origin = Timestamp(origin)
  1397. except (ValueError, TypeError) as err:
  1398. raise ValueError(
  1399. "'origin' should be equal to 'epoch', 'start', 'start_day', "
  1400. "'end', 'end_day' or "
  1401. f"should be a Timestamp convertible type. Got '{origin}' instead."
  1402. ) from err
  1403. try:
  1404. self.offset = Timedelta(offset) if offset is not None else None
  1405. except (ValueError, TypeError) as err:
  1406. raise ValueError(
  1407. "'offset' should be a Timedelta convertible type. "
  1408. f"Got '{offset}' instead."
  1409. ) from err
  1410. # always sort time groupers
  1411. kwargs["sort"] = True
  1412. super().__init__(freq=freq, axis=axis, **kwargs)
  1413. def _get_resampler(self, obj: NDFrame, kind=None) -> Resampler:
  1414. """
  1415. Return my resampler or raise if we have an invalid axis.
  1416. Parameters
  1417. ----------
  1418. obj : Series or DataFrame
  1419. kind : string, optional
  1420. 'period','timestamp','timedelta' are valid
  1421. Returns
  1422. -------
  1423. Resampler
  1424. Raises
  1425. ------
  1426. TypeError if incompatible axis
  1427. """
  1428. _, ax, indexer = self._set_grouper(obj, gpr_index=None)
  1429. if isinstance(ax, DatetimeIndex):
  1430. return DatetimeIndexResampler(
  1431. obj,
  1432. timegrouper=self,
  1433. kind=kind,
  1434. axis=self.axis,
  1435. group_keys=self.group_keys,
  1436. gpr_index=ax,
  1437. )
  1438. elif isinstance(ax, PeriodIndex) or kind == "period":
  1439. return PeriodIndexResampler(
  1440. obj,
  1441. timegrouper=self,
  1442. kind=kind,
  1443. axis=self.axis,
  1444. group_keys=self.group_keys,
  1445. gpr_index=ax,
  1446. )
  1447. elif isinstance(ax, TimedeltaIndex):
  1448. return TimedeltaIndexResampler(
  1449. obj,
  1450. timegrouper=self,
  1451. axis=self.axis,
  1452. group_keys=self.group_keys,
  1453. gpr_index=ax,
  1454. )
  1455. raise TypeError(
  1456. "Only valid with DatetimeIndex, "
  1457. "TimedeltaIndex or PeriodIndex, "
  1458. f"but got an instance of '{type(ax).__name__}'"
  1459. )
  1460. def _get_grouper(
  1461. self, obj: NDFrameT, validate: bool = True
  1462. ) -> tuple[BinGrouper, NDFrameT]:
  1463. # create the resampler and return our binner
  1464. r = self._get_resampler(obj)
  1465. return r.grouper, cast(NDFrameT, r.obj)
  1466. def _get_time_bins(self, ax: DatetimeIndex):
  1467. if not isinstance(ax, DatetimeIndex):
  1468. raise TypeError(
  1469. "axis must be a DatetimeIndex, but got "
  1470. f"an instance of {type(ax).__name__}"
  1471. )
  1472. if len(ax) == 0:
  1473. binner = labels = DatetimeIndex(data=[], freq=self.freq, name=ax.name)
  1474. return binner, [], labels
  1475. first, last = _get_timestamp_range_edges(
  1476. ax.min(),
  1477. ax.max(),
  1478. self.freq,
  1479. unit=ax.unit,
  1480. closed=self.closed,
  1481. origin=self.origin,
  1482. offset=self.offset,
  1483. )
  1484. # GH #12037
  1485. # use first/last directly instead of call replace() on them
  1486. # because replace() will swallow the nanosecond part
  1487. # thus last bin maybe slightly before the end if the end contains
  1488. # nanosecond part and lead to `Values falls after last bin` error
  1489. # GH 25758: If DST lands at midnight (e.g. 'America/Havana'), user feedback
  1490. # has noted that ambiguous=True provides the most sensible result
  1491. binner = labels = date_range(
  1492. freq=self.freq,
  1493. start=first,
  1494. end=last,
  1495. tz=ax.tz,
  1496. name=ax.name,
  1497. ambiguous=True,
  1498. nonexistent="shift_forward",
  1499. unit=ax.unit,
  1500. )
  1501. ax_values = ax.asi8
  1502. binner, bin_edges = self._adjust_bin_edges(binner, ax_values)
  1503. # general version, knowing nothing about relative frequencies
  1504. bins = lib.generate_bins_dt64(
  1505. ax_values, bin_edges, self.closed, hasnans=ax.hasnans
  1506. )
  1507. if self.closed == "right":
  1508. labels = binner
  1509. if self.label == "right":
  1510. labels = labels[1:]
  1511. elif self.label == "right":
  1512. labels = labels[1:]
  1513. if ax.hasnans:
  1514. binner = binner.insert(0, NaT)
  1515. labels = labels.insert(0, NaT)
  1516. # if we end up with more labels than bins
  1517. # adjust the labels
  1518. # GH4076
  1519. if len(bins) < len(labels):
  1520. labels = labels[: len(bins)]
  1521. return binner, bins, labels
  1522. def _adjust_bin_edges(
  1523. self, binner: DatetimeIndex, ax_values: npt.NDArray[np.int64]
  1524. ) -> tuple[DatetimeIndex, npt.NDArray[np.int64]]:
  1525. # Some hacks for > daily data, see #1471, #1458, #1483
  1526. if self.freq != "D" and is_superperiod(self.freq, "D"):
  1527. if self.closed == "right":
  1528. # GH 21459, GH 9119: Adjust the bins relative to the wall time
  1529. edges_dti = binner.tz_localize(None)
  1530. edges_dti = (
  1531. edges_dti
  1532. + Timedelta(days=1, unit=edges_dti.unit).as_unit(edges_dti.unit)
  1533. - Timedelta(1, unit=edges_dti.unit).as_unit(edges_dti.unit)
  1534. )
  1535. bin_edges = edges_dti.tz_localize(binner.tz).asi8
  1536. else:
  1537. bin_edges = binner.asi8
  1538. # intraday values on last day
  1539. if bin_edges[-2] > ax_values.max():
  1540. bin_edges = bin_edges[:-1]
  1541. binner = binner[:-1]
  1542. else:
  1543. bin_edges = binner.asi8
  1544. return binner, bin_edges
  1545. def _get_time_delta_bins(self, ax: TimedeltaIndex):
  1546. if not isinstance(ax, TimedeltaIndex):
  1547. raise TypeError(
  1548. "axis must be a TimedeltaIndex, but got "
  1549. f"an instance of {type(ax).__name__}"
  1550. )
  1551. if not len(ax):
  1552. binner = labels = TimedeltaIndex(data=[], freq=self.freq, name=ax.name)
  1553. return binner, [], labels
  1554. start, end = ax.min(), ax.max()
  1555. if self.closed == "right":
  1556. end += self.freq
  1557. labels = binner = timedelta_range(
  1558. start=start, end=end, freq=self.freq, name=ax.name
  1559. )
  1560. end_stamps = labels
  1561. if self.closed == "left":
  1562. end_stamps += self.freq
  1563. bins = ax.searchsorted(end_stamps, side=self.closed)
  1564. if self.offset:
  1565. # GH 10530 & 31809
  1566. labels += self.offset
  1567. return binner, bins, labels
  1568. def _get_time_period_bins(self, ax: DatetimeIndex):
  1569. if not isinstance(ax, DatetimeIndex):
  1570. raise TypeError(
  1571. "axis must be a DatetimeIndex, but got "
  1572. f"an instance of {type(ax).__name__}"
  1573. )
  1574. freq = self.freq
  1575. if not len(ax):
  1576. binner = labels = PeriodIndex(data=[], freq=freq, name=ax.name)
  1577. return binner, [], labels
  1578. labels = binner = period_range(start=ax[0], end=ax[-1], freq=freq, name=ax.name)
  1579. end_stamps = (labels + freq).asfreq(freq, "s").to_timestamp()
  1580. if ax.tz:
  1581. end_stamps = end_stamps.tz_localize(ax.tz)
  1582. bins = ax.searchsorted(end_stamps, side="left")
  1583. return binner, bins, labels
  1584. def _get_period_bins(self, ax: PeriodIndex):
  1585. if not isinstance(ax, PeriodIndex):
  1586. raise TypeError(
  1587. "axis must be a PeriodIndex, but got "
  1588. f"an instance of {type(ax).__name__}"
  1589. )
  1590. memb = ax.asfreq(self.freq, how=self.convention)
  1591. # NaT handling as in pandas._lib.lib.generate_bins_dt64()
  1592. nat_count = 0
  1593. if memb.hasnans:
  1594. # error: Incompatible types in assignment (expression has type
  1595. # "bool_", variable has type "int") [assignment]
  1596. nat_count = np.sum(memb._isnan) # type: ignore[assignment]
  1597. memb = memb[~memb._isnan]
  1598. if not len(memb):
  1599. # index contains no valid (non-NaT) values
  1600. bins = np.array([], dtype=np.int64)
  1601. binner = labels = PeriodIndex(data=[], freq=self.freq, name=ax.name)
  1602. if len(ax) > 0:
  1603. # index is all NaT
  1604. binner, bins, labels = _insert_nat_bin(binner, bins, labels, len(ax))
  1605. return binner, bins, labels
  1606. freq_mult = self.freq.n
  1607. start = ax.min().asfreq(self.freq, how=self.convention)
  1608. end = ax.max().asfreq(self.freq, how="end")
  1609. bin_shift = 0
  1610. if isinstance(self.freq, Tick):
  1611. # GH 23882 & 31809: get adjusted bin edge labels with 'origin'
  1612. # and 'origin' support. This call only makes sense if the freq is a
  1613. # Tick since offset and origin are only used in those cases.
  1614. # Not doing this check could create an extra empty bin.
  1615. p_start, end = _get_period_range_edges(
  1616. start,
  1617. end,
  1618. self.freq,
  1619. closed=self.closed,
  1620. origin=self.origin,
  1621. offset=self.offset,
  1622. )
  1623. # Get offset for bin edge (not label edge) adjustment
  1624. start_offset = Period(start, self.freq) - Period(p_start, self.freq)
  1625. # error: Item "Period" of "Union[Period, Any]" has no attribute "n"
  1626. bin_shift = start_offset.n % freq_mult # type: ignore[union-attr]
  1627. start = p_start
  1628. labels = binner = period_range(
  1629. start=start, end=end, freq=self.freq, name=ax.name
  1630. )
  1631. i8 = memb.asi8
  1632. # when upsampling to subperiods, we need to generate enough bins
  1633. expected_bins_count = len(binner) * freq_mult
  1634. i8_extend = expected_bins_count - (i8[-1] - i8[0])
  1635. rng = np.arange(i8[0], i8[-1] + i8_extend, freq_mult)
  1636. rng += freq_mult
  1637. # adjust bin edge indexes to account for base
  1638. rng -= bin_shift
  1639. # Wrap in PeriodArray for PeriodArray.searchsorted
  1640. prng = type(memb._data)(rng, dtype=memb.dtype)
  1641. bins = memb.searchsorted(prng, side="left")
  1642. if nat_count > 0:
  1643. binner, bins, labels = _insert_nat_bin(binner, bins, labels, nat_count)
  1644. return binner, bins, labels
  1645. def _take_new_index(
  1646. obj: NDFrameT, indexer: npt.NDArray[np.intp], new_index: Index, axis: AxisInt = 0
  1647. ) -> NDFrameT:
  1648. if isinstance(obj, ABCSeries):
  1649. new_values = algos.take_nd(obj._values, indexer)
  1650. # error: Incompatible return value type (got "Series", expected "NDFrameT")
  1651. return obj._constructor( # type: ignore[return-value]
  1652. new_values, index=new_index, name=obj.name
  1653. )
  1654. elif isinstance(obj, ABCDataFrame):
  1655. if axis == 1:
  1656. raise NotImplementedError("axis 1 is not supported")
  1657. new_mgr = obj._mgr.reindex_indexer(new_axis=new_index, indexer=indexer, axis=1)
  1658. # error: Incompatible return value type
  1659. # (got "DataFrame", expected "NDFrameT")
  1660. return obj._constructor(new_mgr) # type: ignore[return-value]
  1661. else:
  1662. raise ValueError("'obj' should be either a Series or a DataFrame")
  1663. def _get_timestamp_range_edges(
  1664. first: Timestamp,
  1665. last: Timestamp,
  1666. freq: BaseOffset,
  1667. unit: str,
  1668. closed: Literal["right", "left"] = "left",
  1669. origin: TimeGrouperOrigin = "start_day",
  1670. offset: Timedelta | None = None,
  1671. ) -> tuple[Timestamp, Timestamp]:
  1672. """
  1673. Adjust the `first` Timestamp to the preceding Timestamp that resides on
  1674. the provided offset. Adjust the `last` Timestamp to the following
  1675. Timestamp that resides on the provided offset. Input Timestamps that
  1676. already reside on the offset will be adjusted depending on the type of
  1677. offset and the `closed` parameter.
  1678. Parameters
  1679. ----------
  1680. first : pd.Timestamp
  1681. The beginning Timestamp of the range to be adjusted.
  1682. last : pd.Timestamp
  1683. The ending Timestamp of the range to be adjusted.
  1684. freq : pd.DateOffset
  1685. The dateoffset to which the Timestamps will be adjusted.
  1686. closed : {'right', 'left'}, default "left"
  1687. Which side of bin interval is closed.
  1688. origin : {'epoch', 'start', 'start_day'} or Timestamp, default 'start_day'
  1689. The timestamp on which to adjust the grouping. The timezone of origin must
  1690. match the timezone of the index.
  1691. If a timestamp is not used, these values are also supported:
  1692. - 'epoch': `origin` is 1970-01-01
  1693. - 'start': `origin` is the first value of the timeseries
  1694. - 'start_day': `origin` is the first day at midnight of the timeseries
  1695. offset : pd.Timedelta, default is None
  1696. An offset timedelta added to the origin.
  1697. Returns
  1698. -------
  1699. A tuple of length 2, containing the adjusted pd.Timestamp objects.
  1700. """
  1701. if isinstance(freq, Tick):
  1702. index_tz = first.tz
  1703. if isinstance(origin, Timestamp) and (origin.tz is None) != (index_tz is None):
  1704. raise ValueError("The origin must have the same timezone as the index.")
  1705. if origin == "epoch":
  1706. # set the epoch based on the timezone to have similar bins results when
  1707. # resampling on the same kind of indexes on different timezones
  1708. origin = Timestamp("1970-01-01", tz=index_tz)
  1709. if isinstance(freq, Day):
  1710. # _adjust_dates_anchored assumes 'D' means 24H, but first/last
  1711. # might contain a DST transition (23H, 24H, or 25H).
  1712. # So "pretend" the dates are naive when adjusting the endpoints
  1713. first = first.tz_localize(None)
  1714. last = last.tz_localize(None)
  1715. if isinstance(origin, Timestamp):
  1716. origin = origin.tz_localize(None)
  1717. first, last = _adjust_dates_anchored(
  1718. first, last, freq, closed=closed, origin=origin, offset=offset, unit=unit
  1719. )
  1720. if isinstance(freq, Day):
  1721. first = first.tz_localize(index_tz)
  1722. last = last.tz_localize(index_tz)
  1723. else:
  1724. first = first.normalize()
  1725. last = last.normalize()
  1726. if closed == "left":
  1727. first = Timestamp(freq.rollback(first))
  1728. else:
  1729. first = Timestamp(first - freq)
  1730. last = Timestamp(last + freq)
  1731. return first, last
  1732. def _get_period_range_edges(
  1733. first: Period,
  1734. last: Period,
  1735. freq: BaseOffset,
  1736. closed: Literal["right", "left"] = "left",
  1737. origin: TimeGrouperOrigin = "start_day",
  1738. offset: Timedelta | None = None,
  1739. ) -> tuple[Period, Period]:
  1740. """
  1741. Adjust the provided `first` and `last` Periods to the respective Period of
  1742. the given offset that encompasses them.
  1743. Parameters
  1744. ----------
  1745. first : pd.Period
  1746. The beginning Period of the range to be adjusted.
  1747. last : pd.Period
  1748. The ending Period of the range to be adjusted.
  1749. freq : pd.DateOffset
  1750. The freq to which the Periods will be adjusted.
  1751. closed : {'right', 'left'}, default "left"
  1752. Which side of bin interval is closed.
  1753. origin : {'epoch', 'start', 'start_day'}, Timestamp, default 'start_day'
  1754. The timestamp on which to adjust the grouping. The timezone of origin must
  1755. match the timezone of the index.
  1756. If a timestamp is not used, these values are also supported:
  1757. - 'epoch': `origin` is 1970-01-01
  1758. - 'start': `origin` is the first value of the timeseries
  1759. - 'start_day': `origin` is the first day at midnight of the timeseries
  1760. offset : pd.Timedelta, default is None
  1761. An offset timedelta added to the origin.
  1762. Returns
  1763. -------
  1764. A tuple of length 2, containing the adjusted pd.Period objects.
  1765. """
  1766. if not all(isinstance(obj, Period) for obj in [first, last]):
  1767. raise TypeError("'first' and 'last' must be instances of type Period")
  1768. # GH 23882
  1769. first_ts = first.to_timestamp()
  1770. last_ts = last.to_timestamp()
  1771. adjust_first = not freq.is_on_offset(first_ts)
  1772. adjust_last = freq.is_on_offset(last_ts)
  1773. first_ts, last_ts = _get_timestamp_range_edges(
  1774. first_ts, last_ts, freq, unit="ns", closed=closed, origin=origin, offset=offset
  1775. )
  1776. first = (first_ts + int(adjust_first) * freq).to_period(freq)
  1777. last = (last_ts - int(adjust_last) * freq).to_period(freq)
  1778. return first, last
  1779. def _insert_nat_bin(
  1780. binner: PeriodIndex, bins: np.ndarray, labels: PeriodIndex, nat_count: int
  1781. ) -> tuple[PeriodIndex, np.ndarray, PeriodIndex]:
  1782. # NaT handling as in pandas._lib.lib.generate_bins_dt64()
  1783. # shift bins by the number of NaT
  1784. assert nat_count > 0
  1785. bins += nat_count
  1786. bins = np.insert(bins, 0, nat_count)
  1787. # Incompatible types in assignment (expression has type "Index", variable
  1788. # has type "PeriodIndex")
  1789. binner = binner.insert(0, NaT) # type: ignore[assignment]
  1790. # Incompatible types in assignment (expression has type "Index", variable
  1791. # has type "PeriodIndex")
  1792. labels = labels.insert(0, NaT) # type: ignore[assignment]
  1793. return binner, bins, labels
  1794. def _adjust_dates_anchored(
  1795. first: Timestamp,
  1796. last: Timestamp,
  1797. freq: Tick,
  1798. closed: Literal["right", "left"] = "right",
  1799. origin: TimeGrouperOrigin = "start_day",
  1800. offset: Timedelta | None = None,
  1801. unit: str = "ns",
  1802. ) -> tuple[Timestamp, Timestamp]:
  1803. # First and last offsets should be calculated from the start day to fix an
  1804. # error cause by resampling across multiple days when a one day period is
  1805. # not a multiple of the frequency. See GH 8683
  1806. # To handle frequencies that are not multiple or divisible by a day we let
  1807. # the possibility to define a fixed origin timestamp. See GH 31809
  1808. first = first.as_unit(unit)
  1809. last = last.as_unit(unit)
  1810. if offset is not None:
  1811. offset = offset.as_unit(unit)
  1812. freq_value = Timedelta(freq).as_unit(unit)._value
  1813. origin_timestamp = 0 # origin == "epoch"
  1814. if origin == "start_day":
  1815. origin_timestamp = first.normalize()._value
  1816. elif origin == "start":
  1817. origin_timestamp = first._value
  1818. elif isinstance(origin, Timestamp):
  1819. origin_timestamp = origin.as_unit(unit)._value
  1820. elif origin in ["end", "end_day"]:
  1821. origin_last = last if origin == "end" else last.ceil("D")
  1822. sub_freq_times = (origin_last._value - first._value) // freq_value
  1823. if closed == "left":
  1824. sub_freq_times += 1
  1825. first = origin_last - sub_freq_times * freq
  1826. origin_timestamp = first._value
  1827. origin_timestamp += offset._value if offset else 0
  1828. # GH 10117 & GH 19375. If first and last contain timezone information,
  1829. # Perform the calculation in UTC in order to avoid localizing on an
  1830. # Ambiguous or Nonexistent time.
  1831. first_tzinfo = first.tzinfo
  1832. last_tzinfo = last.tzinfo
  1833. if first_tzinfo is not None:
  1834. first = first.tz_convert("UTC")
  1835. if last_tzinfo is not None:
  1836. last = last.tz_convert("UTC")
  1837. foffset = (first._value - origin_timestamp) % freq_value
  1838. loffset = (last._value - origin_timestamp) % freq_value
  1839. if closed == "right":
  1840. if foffset > 0:
  1841. # roll back
  1842. fresult_int = first._value - foffset
  1843. else:
  1844. fresult_int = first._value - freq_value
  1845. if loffset > 0:
  1846. # roll forward
  1847. lresult_int = last._value + (freq_value - loffset)
  1848. else:
  1849. # already the end of the road
  1850. lresult_int = last._value
  1851. else: # closed == 'left'
  1852. if foffset > 0:
  1853. fresult_int = first._value - foffset
  1854. else:
  1855. # start of the road
  1856. fresult_int = first._value
  1857. if loffset > 0:
  1858. # roll forward
  1859. lresult_int = last._value + (freq_value - loffset)
  1860. else:
  1861. lresult_int = last._value + freq_value
  1862. fresult = Timestamp(fresult_int, unit=unit)
  1863. lresult = Timestamp(lresult_int, unit=unit)
  1864. if first_tzinfo is not None:
  1865. fresult = fresult.tz_localize("UTC").tz_convert(first_tzinfo)
  1866. if last_tzinfo is not None:
  1867. lresult = lresult.tz_localize("UTC").tz_convert(last_tzinfo)
  1868. return fresult, lresult
  1869. def asfreq(
  1870. obj: NDFrameT,
  1871. freq,
  1872. method=None,
  1873. how=None,
  1874. normalize: bool = False,
  1875. fill_value=None,
  1876. ) -> NDFrameT:
  1877. """
  1878. Utility frequency conversion method for Series/DataFrame.
  1879. See :meth:`pandas.NDFrame.asfreq` for full documentation.
  1880. """
  1881. if isinstance(obj.index, PeriodIndex):
  1882. if method is not None:
  1883. raise NotImplementedError("'method' argument is not supported")
  1884. if how is None:
  1885. how = "E"
  1886. new_obj = obj.copy()
  1887. new_obj.index = obj.index.asfreq(freq, how=how)
  1888. elif len(obj.index) == 0:
  1889. new_obj = obj.copy()
  1890. new_obj.index = _asfreq_compat(obj.index, freq)
  1891. else:
  1892. dti = date_range(obj.index.min(), obj.index.max(), freq=freq)
  1893. dti.name = obj.index.name
  1894. new_obj = obj.reindex(dti, method=method, fill_value=fill_value)
  1895. if normalize:
  1896. new_obj.index = new_obj.index.normalize()
  1897. return new_obj
  1898. def _asfreq_compat(index: DatetimeIndex | PeriodIndex | TimedeltaIndex, freq):
  1899. """
  1900. Helper to mimic asfreq on (empty) DatetimeIndex and TimedeltaIndex.
  1901. Parameters
  1902. ----------
  1903. index : PeriodIndex, DatetimeIndex, or TimedeltaIndex
  1904. freq : DateOffset
  1905. Returns
  1906. -------
  1907. same type as index
  1908. """
  1909. if len(index) != 0:
  1910. # This should never be reached, always checked by the caller
  1911. raise ValueError(
  1912. "Can only set arbitrary freq for empty DatetimeIndex or TimedeltaIndex"
  1913. )
  1914. new_index: Index
  1915. if isinstance(index, PeriodIndex):
  1916. new_index = index.asfreq(freq=freq)
  1917. elif isinstance(index, DatetimeIndex):
  1918. new_index = DatetimeIndex([], dtype=index.dtype, freq=freq, name=index.name)
  1919. elif isinstance(index, TimedeltaIndex):
  1920. new_index = TimedeltaIndex([], dtype=index.dtype, freq=freq, name=index.name)
  1921. else: # pragma: no cover
  1922. raise TypeError(type(index))
  1923. return new_index
  1924. def maybe_warn_args_and_kwargs(cls, kernel: str, args, kwargs) -> None:
  1925. """
  1926. Warn for deprecation of args and kwargs in resample functions.
  1927. Parameters
  1928. ----------
  1929. cls : type
  1930. Class to warn about.
  1931. kernel : str
  1932. Operation name.
  1933. args : tuple or None
  1934. args passed by user. Will be None if and only if kernel does not have args.
  1935. kwargs : dict or None
  1936. kwargs passed by user. Will be None if and only if kernel does not have kwargs.
  1937. """
  1938. warn_args = args is not None and len(args) > 0
  1939. warn_kwargs = kwargs is not None and len(kwargs) > 0
  1940. if warn_args and warn_kwargs:
  1941. msg = "args and kwargs"
  1942. elif warn_args:
  1943. msg = "args"
  1944. elif warn_kwargs:
  1945. msg = "kwargs"
  1946. else:
  1947. return
  1948. warnings.warn(
  1949. f"Passing additional {msg} to {cls.__name__}.{kernel} has "
  1950. "no impact on the result and is deprecated. This will "
  1951. "raise a TypeError in a future version of pandas.",
  1952. category=FutureWarning,
  1953. stacklevel=find_stack_level(),
  1954. )