frequencies.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619
  1. from __future__ import annotations
  2. import numpy as np
  3. from pandas._libs.algos import unique_deltas
  4. from pandas._libs.tslibs import (
  5. Timestamp,
  6. get_unit_from_dtype,
  7. periods_per_day,
  8. tz_convert_from_utc,
  9. )
  10. from pandas._libs.tslibs.ccalendar import (
  11. DAYS,
  12. MONTH_ALIASES,
  13. MONTH_NUMBERS,
  14. MONTHS,
  15. int_to_weekday,
  16. )
  17. from pandas._libs.tslibs.fields import (
  18. build_field_sarray,
  19. month_position_check,
  20. )
  21. from pandas._libs.tslibs.offsets import (
  22. DateOffset,
  23. Day,
  24. to_offset,
  25. )
  26. from pandas._libs.tslibs.parsing import get_rule_month
  27. from pandas._typing import npt
  28. from pandas.util._decorators import cache_readonly
  29. from pandas.core.dtypes.common import (
  30. is_datetime64_dtype,
  31. is_numeric_dtype,
  32. is_period_dtype,
  33. is_timedelta64_dtype,
  34. )
  35. from pandas.core.dtypes.generic import (
  36. ABCIndex,
  37. ABCSeries,
  38. )
  39. from pandas.core.algorithms import unique
  40. # ---------------------------------------------------------------------
  41. # Offset names ("time rules") and related functions
  42. _offset_to_period_map = {
  43. "WEEKDAY": "D",
  44. "EOM": "M",
  45. "BM": "M",
  46. "BQS": "Q",
  47. "QS": "Q",
  48. "BQ": "Q",
  49. "BA": "A",
  50. "AS": "A",
  51. "BAS": "A",
  52. "MS": "M",
  53. "D": "D",
  54. "C": "C",
  55. "B": "B",
  56. "T": "T",
  57. "S": "S",
  58. "L": "L",
  59. "U": "U",
  60. "N": "N",
  61. "H": "H",
  62. "Q": "Q",
  63. "A": "A",
  64. "W": "W",
  65. "M": "M",
  66. "Y": "A",
  67. "BY": "A",
  68. "YS": "A",
  69. "BYS": "A",
  70. }
  71. _need_suffix = ["QS", "BQ", "BQS", "YS", "AS", "BY", "BA", "BYS", "BAS"]
  72. for _prefix in _need_suffix:
  73. for _m in MONTHS:
  74. key = f"{_prefix}-{_m}"
  75. _offset_to_period_map[key] = _offset_to_period_map[_prefix]
  76. for _prefix in ["A", "Q"]:
  77. for _m in MONTHS:
  78. _alias = f"{_prefix}-{_m}"
  79. _offset_to_period_map[_alias] = _alias
  80. for _d in DAYS:
  81. _offset_to_period_map[f"W-{_d}"] = f"W-{_d}"
  82. def get_period_alias(offset_str: str) -> str | None:
  83. """
  84. Alias to closest period strings BQ->Q etc.
  85. """
  86. return _offset_to_period_map.get(offset_str, None)
  87. # ---------------------------------------------------------------------
  88. # Period codes
  89. def infer_freq(index) -> str | None:
  90. """
  91. Infer the most likely frequency given the input index.
  92. Parameters
  93. ----------
  94. index : DatetimeIndex or TimedeltaIndex
  95. If passed a Series will use the values of the series (NOT THE INDEX).
  96. Returns
  97. -------
  98. str or None
  99. None if no discernible frequency.
  100. Raises
  101. ------
  102. TypeError
  103. If the index is not datetime-like.
  104. ValueError
  105. If there are fewer than three values.
  106. Examples
  107. --------
  108. >>> idx = pd.date_range(start='2020/12/01', end='2020/12/30', periods=30)
  109. >>> pd.infer_freq(idx)
  110. 'D'
  111. """
  112. from pandas.core.api import (
  113. DatetimeIndex,
  114. Index,
  115. )
  116. if isinstance(index, ABCSeries):
  117. values = index._values
  118. if not (
  119. is_datetime64_dtype(values)
  120. or is_timedelta64_dtype(values)
  121. or values.dtype == object
  122. ):
  123. raise TypeError(
  124. "cannot infer freq from a non-convertible dtype "
  125. f"on a Series of {index.dtype}"
  126. )
  127. index = values
  128. inferer: _FrequencyInferer
  129. if not hasattr(index, "dtype"):
  130. pass
  131. elif is_period_dtype(index.dtype):
  132. raise TypeError(
  133. "PeriodIndex given. Check the `freq` attribute "
  134. "instead of using infer_freq."
  135. )
  136. elif is_timedelta64_dtype(index.dtype):
  137. # Allow TimedeltaIndex and TimedeltaArray
  138. inferer = _TimedeltaFrequencyInferer(index)
  139. return inferer.get_freq()
  140. if isinstance(index, Index) and not isinstance(index, DatetimeIndex):
  141. if is_numeric_dtype(index):
  142. raise TypeError(
  143. f"cannot infer freq from a non-convertible index of dtype {index.dtype}"
  144. )
  145. index = index._values
  146. if not isinstance(index, DatetimeIndex):
  147. index = DatetimeIndex(index)
  148. inferer = _FrequencyInferer(index)
  149. return inferer.get_freq()
  150. class _FrequencyInferer:
  151. """
  152. Not sure if I can avoid the state machine here
  153. """
  154. def __init__(self, index) -> None:
  155. self.index = index
  156. self.i8values = index.asi8
  157. # For get_unit_from_dtype we need the dtype to the underlying ndarray,
  158. # which for tz-aware is not the same as index.dtype
  159. if isinstance(index, ABCIndex):
  160. # error: Item "ndarray[Any, Any]" of "Union[ExtensionArray,
  161. # ndarray[Any, Any]]" has no attribute "_ndarray"
  162. self._creso = get_unit_from_dtype(
  163. index._data._ndarray.dtype # type: ignore[union-attr]
  164. )
  165. else:
  166. # otherwise we have DTA/TDA
  167. self._creso = get_unit_from_dtype(index._ndarray.dtype)
  168. # This moves the values, which are implicitly in UTC, to the
  169. # the timezone so they are in local time
  170. if hasattr(index, "tz"):
  171. if index.tz is not None:
  172. self.i8values = tz_convert_from_utc(
  173. self.i8values, index.tz, reso=self._creso
  174. )
  175. if len(index) < 3:
  176. raise ValueError("Need at least 3 dates to infer frequency")
  177. self.is_monotonic = (
  178. self.index._is_monotonic_increasing or self.index._is_monotonic_decreasing
  179. )
  180. @cache_readonly
  181. def deltas(self) -> npt.NDArray[np.int64]:
  182. return unique_deltas(self.i8values)
  183. @cache_readonly
  184. def deltas_asi8(self) -> npt.NDArray[np.int64]:
  185. # NB: we cannot use self.i8values here because we may have converted
  186. # the tz in __init__
  187. return unique_deltas(self.index.asi8)
  188. @cache_readonly
  189. def is_unique(self) -> bool:
  190. return len(self.deltas) == 1
  191. @cache_readonly
  192. def is_unique_asi8(self) -> bool:
  193. return len(self.deltas_asi8) == 1
  194. def get_freq(self) -> str | None:
  195. """
  196. Find the appropriate frequency string to describe the inferred
  197. frequency of self.i8values
  198. Returns
  199. -------
  200. str or None
  201. """
  202. if not self.is_monotonic or not self.index._is_unique:
  203. return None
  204. delta = self.deltas[0]
  205. ppd = periods_per_day(self._creso)
  206. if delta and _is_multiple(delta, ppd):
  207. return self._infer_daily_rule()
  208. # Business hourly, maybe. 17: one day / 65: one weekend
  209. if self.hour_deltas in ([1, 17], [1, 65], [1, 17, 65]):
  210. return "BH"
  211. # Possibly intraday frequency. Here we use the
  212. # original .asi8 values as the modified values
  213. # will not work around DST transitions. See #8772
  214. if not self.is_unique_asi8:
  215. return None
  216. delta = self.deltas_asi8[0]
  217. pph = ppd // 24
  218. ppm = pph // 60
  219. pps = ppm // 60
  220. if _is_multiple(delta, pph):
  221. # Hours
  222. return _maybe_add_count("H", delta / pph)
  223. elif _is_multiple(delta, ppm):
  224. # Minutes
  225. return _maybe_add_count("T", delta / ppm)
  226. elif _is_multiple(delta, pps):
  227. # Seconds
  228. return _maybe_add_count("S", delta / pps)
  229. elif _is_multiple(delta, (pps // 1000)):
  230. # Milliseconds
  231. return _maybe_add_count("L", delta / (pps // 1000))
  232. elif _is_multiple(delta, (pps // 1_000_000)):
  233. # Microseconds
  234. return _maybe_add_count("U", delta / (pps // 1_000_000))
  235. else:
  236. # Nanoseconds
  237. return _maybe_add_count("N", delta)
  238. @cache_readonly
  239. def day_deltas(self) -> list[int]:
  240. ppd = periods_per_day(self._creso)
  241. return [x / ppd for x in self.deltas]
  242. @cache_readonly
  243. def hour_deltas(self) -> list[int]:
  244. pph = periods_per_day(self._creso) // 24
  245. return [x / pph for x in self.deltas]
  246. @cache_readonly
  247. def fields(self) -> np.ndarray: # structured array of fields
  248. return build_field_sarray(self.i8values, reso=self._creso)
  249. @cache_readonly
  250. def rep_stamp(self) -> Timestamp:
  251. return Timestamp(self.i8values[0])
  252. def month_position_check(self) -> str | None:
  253. return month_position_check(self.fields, self.index.dayofweek)
  254. @cache_readonly
  255. def mdiffs(self) -> npt.NDArray[np.int64]:
  256. nmonths = self.fields["Y"] * 12 + self.fields["M"]
  257. return unique_deltas(nmonths.astype("i8"))
  258. @cache_readonly
  259. def ydiffs(self) -> npt.NDArray[np.int64]:
  260. return unique_deltas(self.fields["Y"].astype("i8"))
  261. def _infer_daily_rule(self) -> str | None:
  262. annual_rule = self._get_annual_rule()
  263. if annual_rule:
  264. nyears = self.ydiffs[0]
  265. month = MONTH_ALIASES[self.rep_stamp.month]
  266. alias = f"{annual_rule}-{month}"
  267. return _maybe_add_count(alias, nyears)
  268. quarterly_rule = self._get_quarterly_rule()
  269. if quarterly_rule:
  270. nquarters = self.mdiffs[0] / 3
  271. mod_dict = {0: 12, 2: 11, 1: 10}
  272. month = MONTH_ALIASES[mod_dict[self.rep_stamp.month % 3]]
  273. alias = f"{quarterly_rule}-{month}"
  274. return _maybe_add_count(alias, nquarters)
  275. monthly_rule = self._get_monthly_rule()
  276. if monthly_rule:
  277. return _maybe_add_count(monthly_rule, self.mdiffs[0])
  278. if self.is_unique:
  279. return self._get_daily_rule()
  280. if self._is_business_daily():
  281. return "B"
  282. wom_rule = self._get_wom_rule()
  283. if wom_rule:
  284. return wom_rule
  285. return None
  286. def _get_daily_rule(self) -> str | None:
  287. ppd = periods_per_day(self._creso)
  288. days = self.deltas[0] / ppd
  289. if days % 7 == 0:
  290. # Weekly
  291. wd = int_to_weekday[self.rep_stamp.weekday()]
  292. alias = f"W-{wd}"
  293. return _maybe_add_count(alias, days / 7)
  294. else:
  295. return _maybe_add_count("D", days)
  296. def _get_annual_rule(self) -> str | None:
  297. if len(self.ydiffs) > 1:
  298. return None
  299. if len(unique(self.fields["M"])) > 1:
  300. return None
  301. pos_check = self.month_position_check()
  302. if pos_check is None:
  303. return None
  304. else:
  305. return {"cs": "AS", "bs": "BAS", "ce": "A", "be": "BA"}.get(pos_check)
  306. def _get_quarterly_rule(self) -> str | None:
  307. if len(self.mdiffs) > 1:
  308. return None
  309. if not self.mdiffs[0] % 3 == 0:
  310. return None
  311. pos_check = self.month_position_check()
  312. if pos_check is None:
  313. return None
  314. else:
  315. return {"cs": "QS", "bs": "BQS", "ce": "Q", "be": "BQ"}.get(pos_check)
  316. def _get_monthly_rule(self) -> str | None:
  317. if len(self.mdiffs) > 1:
  318. return None
  319. pos_check = self.month_position_check()
  320. if pos_check is None:
  321. return None
  322. else:
  323. return {"cs": "MS", "bs": "BMS", "ce": "M", "be": "BM"}.get(pos_check)
  324. def _is_business_daily(self) -> bool:
  325. # quick check: cannot be business daily
  326. if self.day_deltas != [1, 3]:
  327. return False
  328. # probably business daily, but need to confirm
  329. first_weekday = self.index[0].weekday()
  330. shifts = np.diff(self.i8values)
  331. ppd = periods_per_day(self._creso)
  332. shifts = np.floor_divide(shifts, ppd)
  333. weekdays = np.mod(first_weekday + np.cumsum(shifts), 7)
  334. return bool(
  335. np.all(
  336. ((weekdays == 0) & (shifts == 3))
  337. | ((weekdays > 0) & (weekdays <= 4) & (shifts == 1))
  338. )
  339. )
  340. def _get_wom_rule(self) -> str | None:
  341. weekdays = unique(self.index.weekday)
  342. if len(weekdays) > 1:
  343. return None
  344. week_of_months = unique((self.index.day - 1) // 7)
  345. # Only attempt to infer up to WOM-4. See #9425
  346. week_of_months = week_of_months[week_of_months < 4]
  347. if len(week_of_months) == 0 or len(week_of_months) > 1:
  348. return None
  349. # get which week
  350. week = week_of_months[0] + 1
  351. wd = int_to_weekday[weekdays[0]]
  352. return f"WOM-{week}{wd}"
  353. class _TimedeltaFrequencyInferer(_FrequencyInferer):
  354. def _infer_daily_rule(self):
  355. if self.is_unique:
  356. return self._get_daily_rule()
  357. def _is_multiple(us, mult: int) -> bool:
  358. return us % mult == 0
  359. def _maybe_add_count(base: str, count: float) -> str:
  360. if count != 1:
  361. assert count == int(count)
  362. count = int(count)
  363. return f"{count}{base}"
  364. else:
  365. return base
  366. # ----------------------------------------------------------------------
  367. # Frequency comparison
  368. def is_subperiod(source, target) -> bool:
  369. """
  370. Returns True if downsampling is possible between source and target
  371. frequencies
  372. Parameters
  373. ----------
  374. source : str or DateOffset
  375. Frequency converting from
  376. target : str or DateOffset
  377. Frequency converting to
  378. Returns
  379. -------
  380. bool
  381. """
  382. if target is None or source is None:
  383. return False
  384. source = _maybe_coerce_freq(source)
  385. target = _maybe_coerce_freq(target)
  386. if _is_annual(target):
  387. if _is_quarterly(source):
  388. return _quarter_months_conform(
  389. get_rule_month(source), get_rule_month(target)
  390. )
  391. return source in {"D", "C", "B", "M", "H", "T", "S", "L", "U", "N"}
  392. elif _is_quarterly(target):
  393. return source in {"D", "C", "B", "M", "H", "T", "S", "L", "U", "N"}
  394. elif _is_monthly(target):
  395. return source in {"D", "C", "B", "H", "T", "S", "L", "U", "N"}
  396. elif _is_weekly(target):
  397. return source in {target, "D", "C", "B", "H", "T", "S", "L", "U", "N"}
  398. elif target == "B":
  399. return source in {"B", "H", "T", "S", "L", "U", "N"}
  400. elif target == "C":
  401. return source in {"C", "H", "T", "S", "L", "U", "N"}
  402. elif target == "D":
  403. return source in {"D", "H", "T", "S", "L", "U", "N"}
  404. elif target == "H":
  405. return source in {"H", "T", "S", "L", "U", "N"}
  406. elif target == "T":
  407. return source in {"T", "S", "L", "U", "N"}
  408. elif target == "S":
  409. return source in {"S", "L", "U", "N"}
  410. elif target == "L":
  411. return source in {"L", "U", "N"}
  412. elif target == "U":
  413. return source in {"U", "N"}
  414. elif target == "N":
  415. return source in {"N"}
  416. else:
  417. return False
  418. def is_superperiod(source, target) -> bool:
  419. """
  420. Returns True if upsampling is possible between source and target
  421. frequencies
  422. Parameters
  423. ----------
  424. source : str or DateOffset
  425. Frequency converting from
  426. target : str or DateOffset
  427. Frequency converting to
  428. Returns
  429. -------
  430. bool
  431. """
  432. if target is None or source is None:
  433. return False
  434. source = _maybe_coerce_freq(source)
  435. target = _maybe_coerce_freq(target)
  436. if _is_annual(source):
  437. if _is_annual(target):
  438. return get_rule_month(source) == get_rule_month(target)
  439. if _is_quarterly(target):
  440. smonth = get_rule_month(source)
  441. tmonth = get_rule_month(target)
  442. return _quarter_months_conform(smonth, tmonth)
  443. return target in {"D", "C", "B", "M", "H", "T", "S", "L", "U", "N"}
  444. elif _is_quarterly(source):
  445. return target in {"D", "C", "B", "M", "H", "T", "S", "L", "U", "N"}
  446. elif _is_monthly(source):
  447. return target in {"D", "C", "B", "H", "T", "S", "L", "U", "N"}
  448. elif _is_weekly(source):
  449. return target in {source, "D", "C", "B", "H", "T", "S", "L", "U", "N"}
  450. elif source == "B":
  451. return target in {"D", "C", "B", "H", "T", "S", "L", "U", "N"}
  452. elif source == "C":
  453. return target in {"D", "C", "B", "H", "T", "S", "L", "U", "N"}
  454. elif source == "D":
  455. return target in {"D", "C", "B", "H", "T", "S", "L", "U", "N"}
  456. elif source == "H":
  457. return target in {"H", "T", "S", "L", "U", "N"}
  458. elif source == "T":
  459. return target in {"T", "S", "L", "U", "N"}
  460. elif source == "S":
  461. return target in {"S", "L", "U", "N"}
  462. elif source == "L":
  463. return target in {"L", "U", "N"}
  464. elif source == "U":
  465. return target in {"U", "N"}
  466. elif source == "N":
  467. return target in {"N"}
  468. else:
  469. return False
  470. def _maybe_coerce_freq(code) -> str:
  471. """we might need to coerce a code to a rule_code
  472. and uppercase it
  473. Parameters
  474. ----------
  475. source : str or DateOffset
  476. Frequency converting from
  477. Returns
  478. -------
  479. str
  480. """
  481. assert code is not None
  482. if isinstance(code, DateOffset):
  483. code = code.rule_code
  484. return code.upper()
  485. def _quarter_months_conform(source: str, target: str) -> bool:
  486. snum = MONTH_NUMBERS[source]
  487. tnum = MONTH_NUMBERS[target]
  488. return snum % 3 == tnum % 3
  489. def _is_annual(rule: str) -> bool:
  490. rule = rule.upper()
  491. return rule == "A" or rule.startswith("A-")
  492. def _is_quarterly(rule: str) -> bool:
  493. rule = rule.upper()
  494. return rule == "Q" or rule.startswith("Q-") or rule.startswith("BQ")
  495. def _is_monthly(rule: str) -> bool:
  496. rule = rule.upper()
  497. return rule in ("M", "BM")
  498. def _is_weekly(rule: str) -> bool:
  499. rule = rule.upper()
  500. return rule == "W" or rule.startswith("W-")
  501. __all__ = [
  502. "Day",
  503. "get_period_alias",
  504. "infer_freq",
  505. "is_subperiod",
  506. "is_superperiod",
  507. "to_offset",
  508. ]