holiday.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609
  1. from __future__ import annotations
  2. from datetime import (
  3. datetime,
  4. timedelta,
  5. )
  6. import warnings
  7. from dateutil.relativedelta import (
  8. FR,
  9. MO,
  10. SA,
  11. SU,
  12. TH,
  13. TU,
  14. WE,
  15. )
  16. import numpy as np
  17. from pandas.errors import PerformanceWarning
  18. from pandas import (
  19. DateOffset,
  20. DatetimeIndex,
  21. Series,
  22. Timestamp,
  23. concat,
  24. date_range,
  25. )
  26. from pandas.tseries.offsets import (
  27. Day,
  28. Easter,
  29. )
  30. def next_monday(dt: datetime) -> datetime:
  31. """
  32. If holiday falls on Saturday, use following Monday instead;
  33. if holiday falls on Sunday, use Monday instead
  34. """
  35. if dt.weekday() == 5:
  36. return dt + timedelta(2)
  37. elif dt.weekday() == 6:
  38. return dt + timedelta(1)
  39. return dt
  40. def next_monday_or_tuesday(dt: datetime) -> datetime:
  41. """
  42. For second holiday of two adjacent ones!
  43. If holiday falls on Saturday, use following Monday instead;
  44. if holiday falls on Sunday or Monday, use following Tuesday instead
  45. (because Monday is already taken by adjacent holiday on the day before)
  46. """
  47. dow = dt.weekday()
  48. if dow in (5, 6):
  49. return dt + timedelta(2)
  50. if dow == 0:
  51. return dt + timedelta(1)
  52. return dt
  53. def previous_friday(dt: datetime) -> datetime:
  54. """
  55. If holiday falls on Saturday or Sunday, use previous Friday instead.
  56. """
  57. if dt.weekday() == 5:
  58. return dt - timedelta(1)
  59. elif dt.weekday() == 6:
  60. return dt - timedelta(2)
  61. return dt
  62. def sunday_to_monday(dt: datetime) -> datetime:
  63. """
  64. If holiday falls on Sunday, use day thereafter (Monday) instead.
  65. """
  66. if dt.weekday() == 6:
  67. return dt + timedelta(1)
  68. return dt
  69. def weekend_to_monday(dt: datetime) -> datetime:
  70. """
  71. If holiday falls on Sunday or Saturday,
  72. use day thereafter (Monday) instead.
  73. Needed for holidays such as Christmas observation in Europe
  74. """
  75. if dt.weekday() == 6:
  76. return dt + timedelta(1)
  77. elif dt.weekday() == 5:
  78. return dt + timedelta(2)
  79. return dt
  80. def nearest_workday(dt: datetime) -> datetime:
  81. """
  82. If holiday falls on Saturday, use day before (Friday) instead;
  83. if holiday falls on Sunday, use day thereafter (Monday) instead.
  84. """
  85. if dt.weekday() == 5:
  86. return dt - timedelta(1)
  87. elif dt.weekday() == 6:
  88. return dt + timedelta(1)
  89. return dt
  90. def next_workday(dt: datetime) -> datetime:
  91. """
  92. returns next weekday used for observances
  93. """
  94. dt += timedelta(days=1)
  95. while dt.weekday() > 4:
  96. # Mon-Fri are 0-4
  97. dt += timedelta(days=1)
  98. return dt
  99. def previous_workday(dt: datetime) -> datetime:
  100. """
  101. returns previous weekday used for observances
  102. """
  103. dt -= timedelta(days=1)
  104. while dt.weekday() > 4:
  105. # Mon-Fri are 0-4
  106. dt -= timedelta(days=1)
  107. return dt
  108. def before_nearest_workday(dt: datetime) -> datetime:
  109. """
  110. returns previous workday after nearest workday
  111. """
  112. return previous_workday(nearest_workday(dt))
  113. def after_nearest_workday(dt: datetime) -> datetime:
  114. """
  115. returns next workday after nearest workday
  116. needed for Boxing day or multiple holidays in a series
  117. """
  118. return next_workday(nearest_workday(dt))
  119. class Holiday:
  120. """
  121. Class that defines a holiday with start/end dates and rules
  122. for observance.
  123. """
  124. def __init__(
  125. self,
  126. name,
  127. year=None,
  128. month=None,
  129. day=None,
  130. offset=None,
  131. observance=None,
  132. start_date=None,
  133. end_date=None,
  134. days_of_week=None,
  135. ) -> None:
  136. """
  137. Parameters
  138. ----------
  139. name : str
  140. Name of the holiday , defaults to class name
  141. offset : array of pandas.tseries.offsets or
  142. class from pandas.tseries.offsets
  143. computes offset from date
  144. observance: function
  145. computes when holiday is given a pandas Timestamp
  146. days_of_week:
  147. provide a tuple of days e.g (0,1,2,3,) for Monday Through Thursday
  148. Monday=0,..,Sunday=6
  149. Examples
  150. --------
  151. >>> from dateutil.relativedelta import MO
  152. >>> USMemorialDay = pd.tseries.holiday.Holiday(
  153. ... "Memorial Day", month=5, day=31, offset=pd.DateOffset(weekday=MO(-1))
  154. ... )
  155. >>> USMemorialDay
  156. Holiday: Memorial Day (month=5, day=31, offset=<DateOffset: weekday=MO(-1)>)
  157. >>> USLaborDay = pd.tseries.holiday.Holiday(
  158. ... "Labor Day", month=9, day=1, offset=pd.DateOffset(weekday=MO(1))
  159. ... )
  160. >>> USLaborDay
  161. Holiday: Labor Day (month=9, day=1, offset=<DateOffset: weekday=MO(+1)>)
  162. >>> July3rd = pd.tseries.holiday.Holiday("July 3rd", month=7, day=3)
  163. >>> July3rd
  164. Holiday: July 3rd (month=7, day=3, )
  165. >>> NewYears = pd.tseries.holiday.Holiday(
  166. ... "New Years Day", month=1, day=1,
  167. ... observance=pd.tseries.holiday.nearest_workday
  168. ... )
  169. >>> NewYears # doctest: +SKIP
  170. Holiday: New Years Day (
  171. month=1, day=1, observance=<function nearest_workday at 0x66545e9bc440>
  172. )
  173. >>> July3rd = pd.tseries.holiday.Holiday(
  174. ... "July 3rd", month=7, day=3,
  175. ... days_of_week=(0, 1, 2, 3)
  176. ... )
  177. >>> July3rd
  178. Holiday: July 3rd (month=7, day=3, )
  179. """
  180. if offset is not None and observance is not None:
  181. raise NotImplementedError("Cannot use both offset and observance.")
  182. self.name = name
  183. self.year = year
  184. self.month = month
  185. self.day = day
  186. self.offset = offset
  187. self.start_date = (
  188. Timestamp(start_date) if start_date is not None else start_date
  189. )
  190. self.end_date = Timestamp(end_date) if end_date is not None else end_date
  191. self.observance = observance
  192. assert days_of_week is None or type(days_of_week) == tuple
  193. self.days_of_week = days_of_week
  194. def __repr__(self) -> str:
  195. info = ""
  196. if self.year is not None:
  197. info += f"year={self.year}, "
  198. info += f"month={self.month}, day={self.day}, "
  199. if self.offset is not None:
  200. info += f"offset={self.offset}"
  201. if self.observance is not None:
  202. info += f"observance={self.observance}"
  203. repr = f"Holiday: {self.name} ({info})"
  204. return repr
  205. def dates(self, start_date, end_date, return_name: bool = False):
  206. """
  207. Calculate holidays observed between start date and end date
  208. Parameters
  209. ----------
  210. start_date : starting date, datetime-like, optional
  211. end_date : ending date, datetime-like, optional
  212. return_name : bool, optional, default=False
  213. If True, return a series that has dates and holiday names.
  214. False will only return dates.
  215. """
  216. start_date = Timestamp(start_date)
  217. end_date = Timestamp(end_date)
  218. filter_start_date = start_date
  219. filter_end_date = end_date
  220. if self.year is not None:
  221. dt = Timestamp(datetime(self.year, self.month, self.day))
  222. if return_name:
  223. return Series(self.name, index=[dt])
  224. else:
  225. return [dt]
  226. dates = self._reference_dates(start_date, end_date)
  227. holiday_dates = self._apply_rule(dates)
  228. if self.days_of_week is not None:
  229. holiday_dates = holiday_dates[
  230. np.in1d(holiday_dates.dayofweek, self.days_of_week)
  231. ]
  232. if self.start_date is not None:
  233. filter_start_date = max(
  234. self.start_date.tz_localize(filter_start_date.tz), filter_start_date
  235. )
  236. if self.end_date is not None:
  237. filter_end_date = min(
  238. self.end_date.tz_localize(filter_end_date.tz), filter_end_date
  239. )
  240. holiday_dates = holiday_dates[
  241. (holiday_dates >= filter_start_date) & (holiday_dates <= filter_end_date)
  242. ]
  243. if return_name:
  244. return Series(self.name, index=holiday_dates)
  245. return holiday_dates
  246. def _reference_dates(self, start_date, end_date):
  247. """
  248. Get reference dates for the holiday.
  249. Return reference dates for the holiday also returning the year
  250. prior to the start_date and year following the end_date. This ensures
  251. that any offsets to be applied will yield the holidays within
  252. the passed in dates.
  253. """
  254. if self.start_date is not None:
  255. start_date = self.start_date.tz_localize(start_date.tz)
  256. if self.end_date is not None:
  257. end_date = self.end_date.tz_localize(start_date.tz)
  258. year_offset = DateOffset(years=1)
  259. reference_start_date = Timestamp(
  260. datetime(start_date.year - 1, self.month, self.day)
  261. )
  262. reference_end_date = Timestamp(
  263. datetime(end_date.year + 1, self.month, self.day)
  264. )
  265. # Don't process unnecessary holidays
  266. dates = date_range(
  267. start=reference_start_date,
  268. end=reference_end_date,
  269. freq=year_offset,
  270. tz=start_date.tz,
  271. )
  272. return dates
  273. def _apply_rule(self, dates):
  274. """
  275. Apply the given offset/observance to a DatetimeIndex of dates.
  276. Parameters
  277. ----------
  278. dates : DatetimeIndex
  279. Dates to apply the given offset/observance rule
  280. Returns
  281. -------
  282. Dates with rules applied
  283. """
  284. if dates.empty:
  285. return DatetimeIndex([])
  286. if self.observance is not None:
  287. return dates.map(lambda d: self.observance(d))
  288. if self.offset is not None:
  289. if not isinstance(self.offset, list):
  290. offsets = [self.offset]
  291. else:
  292. offsets = self.offset
  293. for offset in offsets:
  294. # if we are adding a non-vectorized value
  295. # ignore the PerformanceWarnings:
  296. with warnings.catch_warnings():
  297. warnings.simplefilter("ignore", PerformanceWarning)
  298. dates += offset
  299. return dates
  300. holiday_calendars = {}
  301. def register(cls) -> None:
  302. try:
  303. name = cls.name
  304. except AttributeError:
  305. name = cls.__name__
  306. holiday_calendars[name] = cls
  307. def get_calendar(name):
  308. """
  309. Return an instance of a calendar based on its name.
  310. Parameters
  311. ----------
  312. name : str
  313. Calendar name to return an instance of
  314. """
  315. return holiday_calendars[name]()
  316. class HolidayCalendarMetaClass(type):
  317. def __new__(cls, clsname, bases, attrs):
  318. calendar_class = super().__new__(cls, clsname, bases, attrs)
  319. register(calendar_class)
  320. return calendar_class
  321. class AbstractHolidayCalendar(metaclass=HolidayCalendarMetaClass):
  322. """
  323. Abstract interface to create holidays following certain rules.
  324. """
  325. rules: list[Holiday] = []
  326. start_date = Timestamp(datetime(1970, 1, 1))
  327. end_date = Timestamp(datetime(2200, 12, 31))
  328. _cache = None
  329. def __init__(self, name=None, rules=None) -> None:
  330. """
  331. Initializes holiday object with a given set a rules. Normally
  332. classes just have the rules defined within them.
  333. Parameters
  334. ----------
  335. name : str
  336. Name of the holiday calendar, defaults to class name
  337. rules : array of Holiday objects
  338. A set of rules used to create the holidays.
  339. """
  340. super().__init__()
  341. if name is None:
  342. name = type(self).__name__
  343. self.name = name
  344. if rules is not None:
  345. self.rules = rules
  346. def rule_from_name(self, name):
  347. for rule in self.rules:
  348. if rule.name == name:
  349. return rule
  350. return None
  351. def holidays(self, start=None, end=None, return_name: bool = False):
  352. """
  353. Returns a curve with holidays between start_date and end_date
  354. Parameters
  355. ----------
  356. start : starting date, datetime-like, optional
  357. end : ending date, datetime-like, optional
  358. return_name : bool, optional
  359. If True, return a series that has dates and holiday names.
  360. False will only return a DatetimeIndex of dates.
  361. Returns
  362. -------
  363. DatetimeIndex of holidays
  364. """
  365. if self.rules is None:
  366. raise Exception(
  367. f"Holiday Calendar {self.name} does not have any rules specified"
  368. )
  369. if start is None:
  370. start = AbstractHolidayCalendar.start_date
  371. if end is None:
  372. end = AbstractHolidayCalendar.end_date
  373. start = Timestamp(start)
  374. end = Timestamp(end)
  375. # If we don't have a cache or the dates are outside the prior cache, we
  376. # get them again
  377. if self._cache is None or start < self._cache[0] or end > self._cache[1]:
  378. pre_holidays = [
  379. rule.dates(start, end, return_name=True) for rule in self.rules
  380. ]
  381. if pre_holidays:
  382. holidays = concat(pre_holidays)
  383. else:
  384. holidays = Series(index=DatetimeIndex([]), dtype=object)
  385. self._cache = (start, end, holidays.sort_index())
  386. holidays = self._cache[2]
  387. holidays = holidays[start:end]
  388. if return_name:
  389. return holidays
  390. else:
  391. return holidays.index
  392. @staticmethod
  393. def merge_class(base, other):
  394. """
  395. Merge holiday calendars together. The base calendar
  396. will take precedence to other. The merge will be done
  397. based on each holiday's name.
  398. Parameters
  399. ----------
  400. base : AbstractHolidayCalendar
  401. instance/subclass or array of Holiday objects
  402. other : AbstractHolidayCalendar
  403. instance/subclass or array of Holiday objects
  404. """
  405. try:
  406. other = other.rules
  407. except AttributeError:
  408. pass
  409. if not isinstance(other, list):
  410. other = [other]
  411. other_holidays = {holiday.name: holiday for holiday in other}
  412. try:
  413. base = base.rules
  414. except AttributeError:
  415. pass
  416. if not isinstance(base, list):
  417. base = [base]
  418. base_holidays = {holiday.name: holiday for holiday in base}
  419. other_holidays.update(base_holidays)
  420. return list(other_holidays.values())
  421. def merge(self, other, inplace: bool = False):
  422. """
  423. Merge holiday calendars together. The caller's class
  424. rules take precedence. The merge will be done
  425. based on each holiday's name.
  426. Parameters
  427. ----------
  428. other : holiday calendar
  429. inplace : bool (default=False)
  430. If True set rule_table to holidays, else return array of Holidays
  431. """
  432. holidays = self.merge_class(self, other)
  433. if inplace:
  434. self.rules = holidays
  435. else:
  436. return holidays
  437. USMemorialDay = Holiday(
  438. "Memorial Day", month=5, day=31, offset=DateOffset(weekday=MO(-1))
  439. )
  440. USLaborDay = Holiday("Labor Day", month=9, day=1, offset=DateOffset(weekday=MO(1)))
  441. USColumbusDay = Holiday(
  442. "Columbus Day", month=10, day=1, offset=DateOffset(weekday=MO(2))
  443. )
  444. USThanksgivingDay = Holiday(
  445. "Thanksgiving Day", month=11, day=1, offset=DateOffset(weekday=TH(4))
  446. )
  447. USMartinLutherKingJr = Holiday(
  448. "Birthday of Martin Luther King, Jr.",
  449. start_date=datetime(1986, 1, 1),
  450. month=1,
  451. day=1,
  452. offset=DateOffset(weekday=MO(3)),
  453. )
  454. USPresidentsDay = Holiday(
  455. "Washington’s Birthday", month=2, day=1, offset=DateOffset(weekday=MO(3))
  456. )
  457. GoodFriday = Holiday("Good Friday", month=1, day=1, offset=[Easter(), Day(-2)])
  458. EasterMonday = Holiday("Easter Monday", month=1, day=1, offset=[Easter(), Day(1)])
  459. class USFederalHolidayCalendar(AbstractHolidayCalendar):
  460. """
  461. US Federal Government Holiday Calendar based on rules specified by:
  462. https://www.opm.gov/policy-data-oversight/pay-leave/federal-holidays/
  463. """
  464. rules = [
  465. Holiday("New Year's Day", month=1, day=1, observance=nearest_workday),
  466. USMartinLutherKingJr,
  467. USPresidentsDay,
  468. USMemorialDay,
  469. Holiday(
  470. "Juneteenth National Independence Day",
  471. month=6,
  472. day=19,
  473. start_date="2021-06-18",
  474. observance=nearest_workday,
  475. ),
  476. Holiday("Independence Day", month=7, day=4, observance=nearest_workday),
  477. USLaborDay,
  478. USColumbusDay,
  479. Holiday("Veterans Day", month=11, day=11, observance=nearest_workday),
  480. USThanksgivingDay,
  481. Holiday("Christmas Day", month=12, day=25, observance=nearest_workday),
  482. ]
  483. def HolidayCalendarFactory(name, base, other, base_class=AbstractHolidayCalendar):
  484. rules = AbstractHolidayCalendar.merge_class(base, other)
  485. calendar_class = type(name, (base_class,), {"rules": rules, "name": name})
  486. return calendar_class
  487. __all__ = [
  488. "after_nearest_workday",
  489. "before_nearest_workday",
  490. "FR",
  491. "get_calendar",
  492. "HolidayCalendarFactory",
  493. "MO",
  494. "nearest_workday",
  495. "next_monday",
  496. "next_monday_or_tuesday",
  497. "next_workday",
  498. "previous_friday",
  499. "previous_workday",
  500. "register",
  501. "SA",
  502. "SU",
  503. "sunday_to_monday",
  504. "TH",
  505. "TU",
  506. "WE",
  507. "weekend_to_monday",
  508. ]