period.pyx 80 KB


  1. import re
  2. cimport numpy as cnp
  3. from cpython.object cimport (
  4. Py_EQ,
  5. Py_NE,
  6. PyObject,
  7. PyObject_RichCompare,
  8. PyObject_RichCompareBool,
  9. )
  10. from numpy cimport (
  11. int32_t,
  12. int64_t,
  13. ndarray,
  14. )
  15. import numpy as np
  16. cnp.import_array()
  17. cimport cython
  18. from cpython.datetime cimport (
  19. PyDate_Check,
  20. PyDateTime_Check,
  21. datetime,
  22. import_datetime,
  23. )
  24. from libc.stdlib cimport (
  25. free,
  26. malloc,
  27. )
  28. from libc.string cimport (
  29. memset,
  30. strlen,
  31. )
  32. from libc.time cimport (
  33. strftime,
  34. tm,
  35. )
  36. # import datetime C API
  37. import_datetime()
  38. cimport pandas._libs.tslibs.util as util
  39. from pandas._libs.missing cimport C_NA
  40. from pandas._libs.tslibs.np_datetime cimport (
  41. NPY_DATETIMEUNIT,
  42. NPY_FR_D,
  43. astype_overflowsafe,
  44. check_dts_bounds,
  45. get_timedelta64_value,
  46. npy_datetimestruct,
  47. npy_datetimestruct_to_datetime,
  48. pandas_datetime_to_datetimestruct,
  49. )
  50. from pandas._libs.tslibs.timestamps import Timestamp
  51. from pandas._libs.tslibs.ccalendar cimport (
  52. dayofweek,
  53. get_day_of_year,
  54. get_days_in_month,
  55. get_week_of_year,
  56. is_leapyear,
  57. )
  58. from pandas._libs.tslibs.timedeltas cimport (
  59. delta_to_nanoseconds,
  60. is_any_td_scalar,
  61. )
  62. from pandas._libs.tslibs.conversion import DT64NS_DTYPE
  63. from pandas._libs.tslibs.dtypes cimport (
  64. FR_ANN,
  65. FR_BUS,
  66. FR_DAY,
  67. FR_HR,
  68. FR_MIN,
  69. FR_MS,
  70. FR_MTH,
  71. FR_NS,
  72. FR_QTR,
  73. FR_SEC,
  74. FR_UND,
  75. FR_US,
  76. FR_WK,
  77. PeriodDtypeBase,
  78. attrname_to_abbrevs,
  79. freq_group_code_to_npy_unit,
  80. )
  81. from pandas._libs.tslibs.parsing cimport quarter_to_myear
  82. from pandas._libs.tslibs.parsing import parse_datetime_string_with_reso
  83. from pandas._libs.tslibs.nattype cimport (
  84. NPY_NAT,
  85. c_NaT as NaT,
  86. c_nat_strings as nat_strings,
  87. checknull_with_nat,
  88. )
  89. from pandas._libs.tslibs.offsets cimport (
  90. BaseOffset,
  91. is_offset_object,
  92. is_tick_object,
  93. to_offset,
  94. )
  95. from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG
  96. cdef:
  97. enum:
  98. INT32_MIN = -2_147_483_648LL
  99. ctypedef struct asfreq_info:
  100. int64_t intraday_conversion_factor
  101. int is_end
  102. int to_end
  103. int from_end
  104. ctypedef int64_t (*freq_conv_func)(int64_t, asfreq_info*) nogil
  105. cdef extern from *:
  106. """
  107. // must use npy typedef b/c int64_t is aliased in cython-generated c
  108. // unclear why we need LL for that row.
  109. // see https://github.com/pandas-dev/pandas/pull/34416/
  110. static npy_int64 daytime_conversion_factor_matrix[7][7] = {
  111. {1, 24, 1440, 86400, 86400000, 86400000000, 86400000000000},
  112. {0LL, 1LL, 60LL, 3600LL, 3600000LL, 3600000000LL, 3600000000000LL},
  113. {0, 0, 1, 60, 60000, 60000000, 60000000000},
  114. {0, 0, 0, 1, 1000, 1000000, 1000000000},
  115. {0, 0, 0, 0, 1, 1000, 1000000},
  116. {0, 0, 0, 0, 0, 1, 1000},
  117. {0, 0, 0, 0, 0, 0, 1}};
  118. """
  119. int64_t daytime_conversion_factor_matrix[7][7]
  120. cdef int max_value(int left, int right) nogil:
  121. if left > right:
  122. return left
  123. return right
  124. cdef int min_value(int left, int right) nogil:
  125. if left < right:
  126. return left
  127. return right
  128. cdef int64_t get_daytime_conversion_factor(int from_index, int to_index) nogil:
  129. cdef:
  130. int row = min_value(from_index, to_index)
  131. int col = max_value(from_index, to_index)
  132. # row or col < 6 means frequency strictly lower than Daily, which
  133. # do not use daytime_conversion_factors
  134. if row < 6:
  135. return 0
  136. elif col < 6:
  137. return 0
  138. return daytime_conversion_factor_matrix[row - 6][col - 6]
  139. cdef int64_t nofunc(int64_t ordinal, asfreq_info *af_info) nogil:
  140. return INT32_MIN
  141. cdef int64_t no_op(int64_t ordinal, asfreq_info *af_info) nogil:
  142. return ordinal
  143. cdef freq_conv_func get_asfreq_func(int from_freq, int to_freq) nogil:
  144. cdef:
  145. int from_group = get_freq_group(from_freq)
  146. int to_group = get_freq_group(to_freq)
  147. if from_group == FR_UND:
  148. from_group = FR_DAY
  149. if from_group == FR_BUS:
  150. if to_group == FR_ANN:
  151. return <freq_conv_func>asfreq_BtoA
  152. elif to_group == FR_QTR:
  153. return <freq_conv_func>asfreq_BtoQ
  154. elif to_group == FR_MTH:
  155. return <freq_conv_func>asfreq_BtoM
  156. elif to_group == FR_WK:
  157. return <freq_conv_func>asfreq_BtoW
  158. elif to_group == FR_BUS:
  159. return <freq_conv_func>no_op
  160. elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]:
  161. return <freq_conv_func>asfreq_BtoDT
  162. else:
  163. return <freq_conv_func>nofunc
  164. elif to_group == FR_BUS:
  165. if from_group == FR_ANN:
  166. return <freq_conv_func>asfreq_AtoB
  167. elif from_group == FR_QTR:
  168. return <freq_conv_func>asfreq_QtoB
  169. elif from_group == FR_MTH:
  170. return <freq_conv_func>asfreq_MtoB
  171. elif from_group == FR_WK:
  172. return <freq_conv_func>asfreq_WtoB
  173. elif from_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]:
  174. return <freq_conv_func>asfreq_DTtoB
  175. else:
  176. return <freq_conv_func>nofunc
  177. elif from_group == FR_ANN:
  178. if to_group == FR_ANN:
  179. return <freq_conv_func>asfreq_AtoA
  180. elif to_group == FR_QTR:
  181. return <freq_conv_func>asfreq_AtoQ
  182. elif to_group == FR_MTH:
  183. return <freq_conv_func>asfreq_AtoM
  184. elif to_group == FR_WK:
  185. return <freq_conv_func>asfreq_AtoW
  186. elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]:
  187. return <freq_conv_func>asfreq_AtoDT
  188. else:
  189. return <freq_conv_func>nofunc
  190. elif from_group == FR_QTR:
  191. if to_group == FR_ANN:
  192. return <freq_conv_func>asfreq_QtoA
  193. elif to_group == FR_QTR:
  194. return <freq_conv_func>asfreq_QtoQ
  195. elif to_group == FR_MTH:
  196. return <freq_conv_func>asfreq_QtoM
  197. elif to_group == FR_WK:
  198. return <freq_conv_func>asfreq_QtoW
  199. elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]:
  200. return <freq_conv_func>asfreq_QtoDT
  201. else:
  202. return <freq_conv_func>nofunc
  203. elif from_group == FR_MTH:
  204. if to_group == FR_ANN:
  205. return <freq_conv_func>asfreq_MtoA
  206. elif to_group == FR_QTR:
  207. return <freq_conv_func>asfreq_MtoQ
  208. elif to_group == FR_MTH:
  209. return <freq_conv_func>no_op
  210. elif to_group == FR_WK:
  211. return <freq_conv_func>asfreq_MtoW
  212. elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]:
  213. return <freq_conv_func>asfreq_MtoDT
  214. else:
  215. return <freq_conv_func>nofunc
  216. elif from_group == FR_WK:
  217. if to_group == FR_ANN:
  218. return <freq_conv_func>asfreq_WtoA
  219. elif to_group == FR_QTR:
  220. return <freq_conv_func>asfreq_WtoQ
  221. elif to_group == FR_MTH:
  222. return <freq_conv_func>asfreq_WtoM
  223. elif to_group == FR_WK:
  224. return <freq_conv_func>asfreq_WtoW
  225. elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]:
  226. return <freq_conv_func>asfreq_WtoDT
  227. else:
  228. return <freq_conv_func>nofunc
  229. elif from_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]:
  230. if to_group == FR_ANN:
  231. return <freq_conv_func>asfreq_DTtoA
  232. elif to_group == FR_QTR:
  233. return <freq_conv_func>asfreq_DTtoQ
  234. elif to_group == FR_MTH:
  235. return <freq_conv_func>asfreq_DTtoM
  236. elif to_group == FR_WK:
  237. return <freq_conv_func>asfreq_DTtoW
  238. elif to_group in [FR_DAY, FR_HR, FR_MIN, FR_SEC, FR_MS, FR_US, FR_NS]:
  239. if from_group > to_group:
  240. return <freq_conv_func>downsample_daytime
  241. else:
  242. return <freq_conv_func>upsample_daytime
  243. else:
  244. return <freq_conv_func>nofunc
  245. else:
  246. return <freq_conv_func>nofunc
  247. # --------------------------------------------------------------------
  248. # Frequency Conversion Helpers
  249. cdef int64_t DtoB_weekday(int64_t unix_date) nogil:
  250. return ((unix_date + 4) // 7) * 5 + ((unix_date + 4) % 7) - 4
  251. cdef int64_t DtoB(npy_datetimestruct *dts, int roll_back,
  252. int64_t unix_date) nogil:
  253. # calculate the current week (counting from 1970-01-01) treating
  254. # sunday as last day of a week
  255. cdef:
  256. int day_of_week = dayofweek(dts.year, dts.month, dts.day)
  257. if roll_back == 1:
  258. if day_of_week > 4:
  259. # change to friday before weekend
  260. unix_date -= (day_of_week - 4)
  261. else:
  262. if day_of_week > 4:
  263. # change to Monday after weekend
  264. unix_date += (7 - day_of_week)
  265. return DtoB_weekday(unix_date)
  266. cdef int64_t upsample_daytime(int64_t ordinal, asfreq_info *af_info) nogil:
  267. if af_info.is_end:
  268. return (ordinal + 1) * af_info.intraday_conversion_factor - 1
  269. else:
  270. return ordinal * af_info.intraday_conversion_factor
  271. cdef int64_t downsample_daytime(int64_t ordinal, asfreq_info *af_info) nogil:
  272. return ordinal // af_info.intraday_conversion_factor
  273. cdef int64_t transform_via_day(int64_t ordinal,
  274. asfreq_info *af_info,
  275. freq_conv_func first_func,
  276. freq_conv_func second_func) nogil:
  277. cdef:
  278. int64_t result
  279. result = first_func(ordinal, af_info)
  280. result = second_func(result, af_info)
  281. return result
  282. # --------------------------------------------------------------------
  283. # Conversion _to_ Daily Freq
  284. cdef int64_t asfreq_AtoDT(int64_t ordinal, asfreq_info *af_info) nogil:
  285. cdef:
  286. int64_t unix_date
  287. npy_datetimestruct dts
  288. ordinal += af_info.is_end
  289. dts.year = ordinal + 1970
  290. dts.month = 1
  291. adjust_dts_for_month(&dts, af_info.from_end)
  292. unix_date = unix_date_from_ymd(dts.year, dts.month, 1)
  293. unix_date -= af_info.is_end
  294. return upsample_daytime(unix_date, af_info)
  295. cdef int64_t asfreq_QtoDT(int64_t ordinal, asfreq_info *af_info) nogil:
  296. cdef:
  297. int64_t unix_date
  298. npy_datetimestruct dts
  299. ordinal += af_info.is_end
  300. dts.year = ordinal // 4 + 1970
  301. dts.month = (ordinal % 4) * 3 + 1
  302. adjust_dts_for_month(&dts, af_info.from_end)
  303. unix_date = unix_date_from_ymd(dts.year, dts.month, 1)
  304. unix_date -= af_info.is_end
  305. return upsample_daytime(unix_date, af_info)
  306. cdef int64_t asfreq_MtoDT(int64_t ordinal, asfreq_info *af_info) nogil:
  307. cdef:
  308. int64_t unix_date
  309. int year, month
  310. ordinal += af_info.is_end
  311. year = ordinal // 12 + 1970
  312. month = ordinal % 12 + 1
  313. unix_date = unix_date_from_ymd(year, month, 1)
  314. unix_date -= af_info.is_end
  315. return upsample_daytime(unix_date, af_info)
  316. cdef int64_t asfreq_WtoDT(int64_t ordinal, asfreq_info *af_info) nogil:
  317. ordinal = (ordinal * 7 + af_info.from_end - 4 +
  318. (7 - 1) * (af_info.is_end - 1))
  319. return upsample_daytime(ordinal, af_info)
  320. # --------------------------------------------------------------------
  321. # Conversion _to_ BusinessDay Freq
  322. cdef int64_t asfreq_AtoB(int64_t ordinal, asfreq_info *af_info) nogil:
  323. cdef:
  324. int roll_back
  325. npy_datetimestruct dts
  326. int64_t unix_date = asfreq_AtoDT(ordinal, af_info)
  327. pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts)
  328. roll_back = af_info.is_end
  329. return DtoB(&dts, roll_back, unix_date)
  330. cdef int64_t asfreq_QtoB(int64_t ordinal, asfreq_info *af_info) nogil:
  331. cdef:
  332. int roll_back
  333. npy_datetimestruct dts
  334. int64_t unix_date = asfreq_QtoDT(ordinal, af_info)
  335. pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts)
  336. roll_back = af_info.is_end
  337. return DtoB(&dts, roll_back, unix_date)
  338. cdef int64_t asfreq_MtoB(int64_t ordinal, asfreq_info *af_info) nogil:
  339. cdef:
  340. int roll_back
  341. npy_datetimestruct dts
  342. int64_t unix_date = asfreq_MtoDT(ordinal, af_info)
  343. pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts)
  344. roll_back = af_info.is_end
  345. return DtoB(&dts, roll_back, unix_date)
  346. cdef int64_t asfreq_WtoB(int64_t ordinal, asfreq_info *af_info) nogil:
  347. cdef:
  348. int roll_back
  349. npy_datetimestruct dts
  350. int64_t unix_date = asfreq_WtoDT(ordinal, af_info)
  351. pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts)
  352. roll_back = af_info.is_end
  353. return DtoB(&dts, roll_back, unix_date)
  354. cdef int64_t asfreq_DTtoB(int64_t ordinal, asfreq_info *af_info) nogil:
  355. cdef:
  356. int roll_back
  357. npy_datetimestruct dts
  358. int64_t unix_date = downsample_daytime(ordinal, af_info)
  359. pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, &dts)
  360. # This usage defines roll_back the opposite way from the others
  361. roll_back = 1 - af_info.is_end
  362. return DtoB(&dts, roll_back, unix_date)
  363. # ----------------------------------------------------------------------
  364. # Conversion _from_ Daily Freq
  365. cdef int64_t asfreq_DTtoA(int64_t ordinal, asfreq_info *af_info) nogil:
  366. cdef:
  367. npy_datetimestruct dts
  368. ordinal = downsample_daytime(ordinal, af_info)
  369. pandas_datetime_to_datetimestruct(ordinal, NPY_FR_D, &dts)
  370. return dts_to_year_ordinal(&dts, af_info.to_end)
  371. cdef int DtoQ_yq(int64_t ordinal, asfreq_info *af_info, npy_datetimestruct* dts) nogil:
  372. cdef:
  373. int quarter
  374. pandas_datetime_to_datetimestruct(ordinal, NPY_FR_D, dts)
  375. adjust_dts_for_qtr(dts, af_info.to_end)
  376. quarter = month_to_quarter(dts.month)
  377. return quarter
  378. cdef int64_t asfreq_DTtoQ(int64_t ordinal, asfreq_info *af_info) nogil:
  379. cdef:
  380. int quarter
  381. npy_datetimestruct dts
  382. ordinal = downsample_daytime(ordinal, af_info)
  383. quarter = DtoQ_yq(ordinal, af_info, &dts)
  384. return <int64_t>((dts.year - 1970) * 4 + quarter - 1)
  385. cdef int64_t asfreq_DTtoM(int64_t ordinal, asfreq_info *af_info) nogil:
  386. cdef:
  387. npy_datetimestruct dts
  388. ordinal = downsample_daytime(ordinal, af_info)
  389. pandas_datetime_to_datetimestruct(ordinal, NPY_FR_D, &dts)
  390. return dts_to_month_ordinal(&dts)
  391. cdef int64_t asfreq_DTtoW(int64_t ordinal, asfreq_info *af_info) nogil:
  392. ordinal = downsample_daytime(ordinal, af_info)
  393. return unix_date_to_week(ordinal, af_info.to_end)
  394. cdef int64_t unix_date_to_week(int64_t unix_date, int to_end) nogil:
  395. return (unix_date + 3 - to_end) // 7 + 1
  396. # --------------------------------------------------------------------
  397. # Conversion _from_ BusinessDay Freq
  398. cdef int64_t asfreq_BtoDT(int64_t ordinal, asfreq_info *af_info) nogil:
  399. ordinal = ((ordinal + 3) // 5) * 7 + (ordinal + 3) % 5 - 3
  400. return upsample_daytime(ordinal, af_info)
  401. cdef int64_t asfreq_BtoA(int64_t ordinal, asfreq_info *af_info) nogil:
  402. return transform_via_day(ordinal, af_info,
  403. <freq_conv_func>asfreq_BtoDT,
  404. <freq_conv_func>asfreq_DTtoA)
  405. cdef int64_t asfreq_BtoQ(int64_t ordinal, asfreq_info *af_info) nogil:
  406. return transform_via_day(ordinal, af_info,
  407. <freq_conv_func>asfreq_BtoDT,
  408. <freq_conv_func>asfreq_DTtoQ)
  409. cdef int64_t asfreq_BtoM(int64_t ordinal, asfreq_info *af_info) nogil:
  410. return transform_via_day(ordinal, af_info,
  411. <freq_conv_func>asfreq_BtoDT,
  412. <freq_conv_func>asfreq_DTtoM)
  413. cdef int64_t asfreq_BtoW(int64_t ordinal, asfreq_info *af_info) nogil:
  414. return transform_via_day(ordinal, af_info,
  415. <freq_conv_func>asfreq_BtoDT,
  416. <freq_conv_func>asfreq_DTtoW)
  417. # ----------------------------------------------------------------------
  418. # Conversion _from_ Annual Freq
  419. cdef int64_t asfreq_AtoA(int64_t ordinal, asfreq_info *af_info) nogil:
  420. return transform_via_day(ordinal, af_info,
  421. <freq_conv_func>asfreq_AtoDT,
  422. <freq_conv_func>asfreq_DTtoA)
  423. cdef int64_t asfreq_AtoQ(int64_t ordinal, asfreq_info *af_info) nogil:
  424. return transform_via_day(ordinal, af_info,
  425. <freq_conv_func>asfreq_AtoDT,
  426. <freq_conv_func>asfreq_DTtoQ)
  427. cdef int64_t asfreq_AtoM(int64_t ordinal, asfreq_info *af_info) nogil:
  428. return transform_via_day(ordinal, af_info,
  429. <freq_conv_func>asfreq_AtoDT,
  430. <freq_conv_func>asfreq_DTtoM)
  431. cdef int64_t asfreq_AtoW(int64_t ordinal, asfreq_info *af_info) nogil:
  432. return transform_via_day(ordinal, af_info,
  433. <freq_conv_func>asfreq_AtoDT,
  434. <freq_conv_func>asfreq_DTtoW)
  435. # ----------------------------------------------------------------------
  436. # Conversion _from_ Quarterly Freq
  437. cdef int64_t asfreq_QtoQ(int64_t ordinal, asfreq_info *af_info) nogil:
  438. return transform_via_day(ordinal, af_info,
  439. <freq_conv_func>asfreq_QtoDT,
  440. <freq_conv_func>asfreq_DTtoQ)
  441. cdef int64_t asfreq_QtoA(int64_t ordinal, asfreq_info *af_info) nogil:
  442. return transform_via_day(ordinal, af_info,
  443. <freq_conv_func>asfreq_QtoDT,
  444. <freq_conv_func>asfreq_DTtoA)
  445. cdef int64_t asfreq_QtoM(int64_t ordinal, asfreq_info *af_info) nogil:
  446. return transform_via_day(ordinal, af_info,
  447. <freq_conv_func>asfreq_QtoDT,
  448. <freq_conv_func>asfreq_DTtoM)
  449. cdef int64_t asfreq_QtoW(int64_t ordinal, asfreq_info *af_info) nogil:
  450. return transform_via_day(ordinal, af_info,
  451. <freq_conv_func>asfreq_QtoDT,
  452. <freq_conv_func>asfreq_DTtoW)
  453. # ----------------------------------------------------------------------
  454. # Conversion _from_ Monthly Freq
  455. cdef int64_t asfreq_MtoA(int64_t ordinal, asfreq_info *af_info) nogil:
  456. return transform_via_day(ordinal, af_info,
  457. <freq_conv_func>asfreq_MtoDT,
  458. <freq_conv_func>asfreq_DTtoA)
  459. cdef int64_t asfreq_MtoQ(int64_t ordinal, asfreq_info *af_info) nogil:
  460. return transform_via_day(ordinal, af_info,
  461. <freq_conv_func>asfreq_MtoDT,
  462. <freq_conv_func>asfreq_DTtoQ)
  463. cdef int64_t asfreq_MtoW(int64_t ordinal, asfreq_info *af_info) nogil:
  464. return transform_via_day(ordinal, af_info,
  465. <freq_conv_func>asfreq_MtoDT,
  466. <freq_conv_func>asfreq_DTtoW)
  467. # ----------------------------------------------------------------------
  468. # Conversion _from_ Weekly Freq
  469. cdef int64_t asfreq_WtoA(int64_t ordinal, asfreq_info *af_info) nogil:
  470. return transform_via_day(ordinal, af_info,
  471. <freq_conv_func>asfreq_WtoDT,
  472. <freq_conv_func>asfreq_DTtoA)
  473. cdef int64_t asfreq_WtoQ(int64_t ordinal, asfreq_info *af_info) nogil:
  474. return transform_via_day(ordinal, af_info,
  475. <freq_conv_func>asfreq_WtoDT,
  476. <freq_conv_func>asfreq_DTtoQ)
  477. cdef int64_t asfreq_WtoM(int64_t ordinal, asfreq_info *af_info) nogil:
  478. return transform_via_day(ordinal, af_info,
  479. <freq_conv_func>asfreq_WtoDT,
  480. <freq_conv_func>asfreq_DTtoM)
  481. cdef int64_t asfreq_WtoW(int64_t ordinal, asfreq_info *af_info) nogil:
  482. return transform_via_day(ordinal, af_info,
  483. <freq_conv_func>asfreq_WtoDT,
  484. <freq_conv_func>asfreq_DTtoW)
  485. # ----------------------------------------------------------------------
  486. @cython.cdivision
  487. cdef char* c_strftime(npy_datetimestruct *dts, char *fmt):
  488. """
  489. Generate a nice string representation of the period
  490. object, originally from DateObject_strftime
  491. Parameters
  492. ----------
  493. dts : npy_datetimestruct*
  494. fmt : char*
  495. Returns
  496. -------
  497. result : char*
  498. """
  499. cdef:
  500. tm c_date
  501. char *result
  502. int result_len = strlen(fmt) + 50
  503. c_date.tm_sec = dts.sec
  504. c_date.tm_min = dts.min
  505. c_date.tm_hour = dts.hour
  506. c_date.tm_mday = dts.day
  507. c_date.tm_mon = dts.month - 1
  508. c_date.tm_year = dts.year - 1900
  509. c_date.tm_wday = (dayofweek(dts.year, dts.month, dts.day) + 1) % 7
  510. c_date.tm_yday = get_day_of_year(dts.year, dts.month, dts.day) - 1
  511. c_date.tm_isdst = -1
  512. result = <char*>malloc(result_len * sizeof(char))
  513. strftime(result, result_len, fmt, &c_date)
  514. return result
  515. # ----------------------------------------------------------------------
  516. # Conversion between date_info and npy_datetimestruct
  517. cdef int get_freq_group(int freq) nogil:
  518. # See also FreqGroup.get_freq_group
  519. return (freq // 1000) * 1000
  520. cdef int get_freq_group_index(int freq) nogil:
  521. return freq // 1000
  522. cdef void adjust_dts_for_month(npy_datetimestruct* dts, int from_end) nogil:
  523. if from_end != 12:
  524. dts.month += from_end
  525. if dts.month > 12:
  526. dts.month -= 12
  527. else:
  528. dts.year -= 1
  529. cdef void adjust_dts_for_qtr(npy_datetimestruct* dts, int to_end) nogil:
  530. if to_end != 12:
  531. dts.month -= to_end
  532. if dts.month <= 0:
  533. dts.month += 12
  534. else:
  535. dts.year += 1
  536. # Find the unix_date (days elapsed since datetime(1970, 1, 1)
  537. # for the given year/month/day.
  538. # Assumes GREGORIAN_CALENDAR */
  539. cdef int64_t unix_date_from_ymd(int year, int month, int day) nogil:
  540. # Calculate the absolute date
  541. cdef:
  542. npy_datetimestruct dts
  543. int64_t unix_date
  544. memset(&dts, 0, sizeof(npy_datetimestruct))
  545. dts.year = year
  546. dts.month = month
  547. dts.day = day
  548. unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, &dts)
  549. return unix_date
  550. cdef int64_t dts_to_month_ordinal(npy_datetimestruct* dts) nogil:
  551. # AKA: use npy_datetimestruct_to_datetime(NPY_FR_M, &dts)
  552. return <int64_t>((dts.year - 1970) * 12 + dts.month - 1)
  553. cdef int64_t dts_to_year_ordinal(npy_datetimestruct *dts, int to_end) nogil:
  554. cdef:
  555. int64_t result
  556. result = npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_Y, dts)
  557. if dts.month > to_end:
  558. return result + 1
  559. else:
  560. return result
  561. cdef int64_t dts_to_qtr_ordinal(npy_datetimestruct* dts, int to_end) nogil:
  562. cdef:
  563. int quarter
  564. adjust_dts_for_qtr(dts, to_end)
  565. quarter = month_to_quarter(dts.month)
  566. return <int64_t>((dts.year - 1970) * 4 + quarter - 1)
  567. cdef int get_anchor_month(int freq, int freq_group) nogil:
  568. cdef:
  569. int fmonth
  570. fmonth = freq - freq_group
  571. if fmonth == 0:
  572. fmonth = 12
  573. return fmonth
  574. # specifically _dont_ use cdvision or else ordinals near -1 are assigned to
  575. # incorrect dates GH#19643
  576. @cython.cdivision(False)
  577. cdef int64_t get_period_ordinal(npy_datetimestruct *dts, int freq) nogil:
  578. """
  579. Generate an ordinal in period space
  580. Parameters
  581. ----------
  582. dts : npy_datetimestruct*
  583. freq : int
  584. Returns
  585. -------
  586. period_ordinal : int64_t
  587. """
  588. cdef:
  589. int64_t unix_date
  590. int freq_group, fmonth
  591. NPY_DATETIMEUNIT unit
  592. freq_group = get_freq_group(freq)
  593. if freq_group == FR_ANN:
  594. fmonth = get_anchor_month(freq, freq_group)
  595. return dts_to_year_ordinal(dts, fmonth)
  596. elif freq_group == FR_QTR:
  597. fmonth = get_anchor_month(freq, freq_group)
  598. return dts_to_qtr_ordinal(dts, fmonth)
  599. elif freq_group == FR_WK:
  600. unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts)
  601. return unix_date_to_week(unix_date, freq - FR_WK)
  602. elif freq == FR_BUS:
  603. unix_date = npy_datetimestruct_to_datetime(NPY_FR_D, dts)
  604. return DtoB(dts, 0, unix_date)
  605. unit = freq_group_code_to_npy_unit(freq)
  606. return npy_datetimestruct_to_datetime(unit, dts)
  607. cdef void get_date_info(int64_t ordinal, int freq, npy_datetimestruct *dts) nogil:
  608. cdef:
  609. int64_t unix_date, nanos
  610. npy_datetimestruct dts2
  611. unix_date = get_unix_date(ordinal, freq)
  612. nanos = get_time_nanos(freq, unix_date, ordinal)
  613. pandas_datetime_to_datetimestruct(unix_date, NPY_FR_D, dts)
  614. pandas_datetime_to_datetimestruct(nanos, NPY_DATETIMEUNIT.NPY_FR_ns, &dts2)
  615. dts.hour = dts2.hour
  616. dts.min = dts2.min
  617. dts.sec = dts2.sec
  618. dts.us = dts2.us
  619. dts.ps = dts2.ps
  620. cdef int64_t get_unix_date(int64_t period_ordinal, int freq) nogil:
  621. """
  622. Returns the proleptic Gregorian ordinal of the date, as an integer.
  623. This corresponds to the number of days since Jan., 1st, 1970 AD.
  624. When the instance has a frequency less than daily, the proleptic date
  625. is calculated for the last day of the period.
  626. Parameters
  627. ----------
  628. period_ordinal : int64_t
  629. freq : int
  630. Returns
  631. -------
  632. unix_date : int64_t number of days since datetime(1970, 1, 1)
  633. """
  634. cdef:
  635. asfreq_info af_info
  636. freq_conv_func toDaily = NULL
  637. if freq == FR_DAY:
  638. return period_ordinal
  639. toDaily = get_asfreq_func(freq, FR_DAY)
  640. get_asfreq_info(freq, FR_DAY, True, &af_info)
  641. return toDaily(period_ordinal, &af_info)
  642. @cython.cdivision
  643. cdef int64_t get_time_nanos(int freq, int64_t unix_date, int64_t ordinal) nogil:
  644. """
  645. Find the number of nanoseconds after midnight on the given unix_date
  646. that the ordinal represents in the given frequency.
  647. Parameters
  648. ----------
  649. freq : int
  650. unix_date : int64_t
  651. ordinal : int64_t
  652. Returns
  653. -------
  654. int64_t
  655. """
  656. cdef:
  657. int64_t sub, factor
  658. int64_t nanos_in_day = 24 * 3600 * 10**9
  659. freq = get_freq_group(freq)
  660. if freq <= FR_DAY:
  661. return 0
  662. elif freq == FR_NS:
  663. factor = 1
  664. elif freq == FR_US:
  665. factor = 10**3
  666. elif freq == FR_MS:
  667. factor = 10**6
  668. elif freq == FR_SEC:
  669. factor = 10 **9
  670. elif freq == FR_MIN:
  671. factor = 10**9 * 60
  672. else:
  673. # We must have freq == FR_HR
  674. factor = 10**9 * 3600
  675. sub = ordinal - unix_date * (nanos_in_day / factor)
  676. return sub * factor
  677. cdef int get_yq(int64_t ordinal, int freq, npy_datetimestruct* dts):
  678. """
  679. Find the year and quarter of a Period with the given ordinal and frequency
  680. Parameters
  681. ----------
  682. ordinal : int64_t
  683. freq : int
  684. dts : *npy_datetimestruct
  685. Returns
  686. -------
  687. quarter : int
  688. describes the implied quarterly frequency associated with `freq`
  689. Notes
  690. -----
  691. Sets dts.year in-place.
  692. """
  693. cdef:
  694. asfreq_info af_info
  695. int qtr_freq
  696. int64_t unix_date
  697. int quarter
  698. unix_date = get_unix_date(ordinal, freq)
  699. if get_freq_group(freq) == FR_QTR:
  700. qtr_freq = freq
  701. else:
  702. qtr_freq = FR_QTR
  703. get_asfreq_info(FR_DAY, qtr_freq, True, &af_info)
  704. quarter = DtoQ_yq(unix_date, &af_info, dts)
  705. return quarter
  706. cdef int month_to_quarter(int month) nogil:
  707. return (month - 1) // 3 + 1
  708. # ----------------------------------------------------------------------
  709. # Period logic
  710. @cython.wraparound(False)
  711. @cython.boundscheck(False)
  712. def periodarr_to_dt64arr(const int64_t[:] periodarr, int freq):
  713. """
  714. Convert array to datetime64 values from a set of ordinals corresponding to
  715. periods per period convention.
  716. """
  717. cdef:
  718. int64_t[::1] out
  719. Py_ssize_t i, N
  720. if freq < 6000: # i.e. FR_DAY, hard-code to avoid need to cast
  721. N = len(periodarr)
  722. out = np.empty(N, dtype="i8")
  723. # We get here with freqs that do not correspond to a datetime64 unit
  724. for i in range(N):
  725. out[i] = period_ordinal_to_dt64(periodarr[i], freq)
  726. return out.base # .base to access underlying np.ndarray
  727. else:
  728. # Short-circuit for performance
  729. if freq == FR_NS:
  730. # TODO: copy?
  731. return periodarr.base
  732. if freq == FR_US:
  733. dta = periodarr.base.view("M8[us]")
  734. elif freq == FR_MS:
  735. dta = periodarr.base.view("M8[ms]")
  736. elif freq == FR_SEC:
  737. dta = periodarr.base.view("M8[s]")
  738. elif freq == FR_MIN:
  739. dta = periodarr.base.view("M8[m]")
  740. elif freq == FR_HR:
  741. dta = periodarr.base.view("M8[h]")
  742. elif freq == FR_DAY:
  743. dta = periodarr.base.view("M8[D]")
  744. return astype_overflowsafe(dta, dtype=DT64NS_DTYPE)
  745. cdef void get_asfreq_info(int from_freq, int to_freq,
  746. bint is_end, asfreq_info *af_info) nogil:
  747. """
  748. Construct the `asfreq_info` object used to convert an ordinal from
  749. `from_freq` to `to_freq`.
  750. Parameters
  751. ----------
  752. from_freq : int
  753. to_freq int
  754. is_end : bool
  755. af_info : *asfreq_info
  756. """
  757. cdef:
  758. int from_group = get_freq_group(from_freq)
  759. int to_group = get_freq_group(to_freq)
  760. af_info.is_end = is_end
  761. af_info.intraday_conversion_factor = get_daytime_conversion_factor(
  762. get_freq_group_index(max_value(from_group, FR_DAY)),
  763. get_freq_group_index(max_value(to_group, FR_DAY)))
  764. if from_group == FR_WK:
  765. af_info.from_end = calc_week_end(from_freq, from_group)
  766. elif from_group == FR_ANN:
  767. af_info.from_end = calc_a_year_end(from_freq, from_group)
  768. elif from_group == FR_QTR:
  769. af_info.from_end = calc_a_year_end(from_freq, from_group)
  770. if to_group == FR_WK:
  771. af_info.to_end = calc_week_end(to_freq, to_group)
  772. elif to_group == FR_ANN:
  773. af_info.to_end = calc_a_year_end(to_freq, to_group)
  774. elif to_group == FR_QTR:
  775. af_info.to_end = calc_a_year_end(to_freq, to_group)
  776. @cython.cdivision
  777. cdef int calc_a_year_end(int freq, int group) nogil:
  778. cdef:
  779. int result = (freq - group) % 12
  780. if result == 0:
  781. return 12
  782. else:
  783. return result
  784. cdef int calc_week_end(int freq, int group) nogil:
  785. return freq - group
  786. cpdef int64_t period_asfreq(int64_t ordinal, int freq1, int freq2, bint end):
  787. """
  788. Convert period ordinal from one frequency to another, and if upsampling,
  789. choose to use start ('S') or end ('E') of period.
  790. """
  791. cdef:
  792. int64_t retval
  793. _period_asfreq(&ordinal, &retval, 1, freq1, freq2, end)
  794. return retval
  795. @cython.wraparound(False)
  796. @cython.boundscheck(False)
  797. def period_asfreq_arr(ndarray[int64_t] arr, int freq1, int freq2, bint end):
  798. """
  799. Convert int64-array of period ordinals from one frequency to another, and
  800. if upsampling, choose to use start ('S') or end ('E') of period.
  801. """
  802. cdef:
  803. Py_ssize_t n = len(arr)
  804. Py_ssize_t increment = arr.strides[0] // 8
  805. ndarray[int64_t] result = cnp.PyArray_EMPTY(
  806. arr.ndim, arr.shape, cnp.NPY_INT64, 0
  807. )
  808. _period_asfreq(
  809. <int64_t*>cnp.PyArray_DATA(arr),
  810. <int64_t*>cnp.PyArray_DATA(result),
  811. n,
  812. freq1,
  813. freq2,
  814. end,
  815. increment,
  816. )
  817. return result
  818. @cython.wraparound(False)
  819. @cython.boundscheck(False)
  820. cdef void _period_asfreq(
  821. int64_t* ordinals,
  822. int64_t* out,
  823. Py_ssize_t length,
  824. int freq1,
  825. int freq2,
  826. bint end,
  827. Py_ssize_t increment=1,
  828. ):
  829. """See period_asfreq.__doc__"""
  830. cdef:
  831. Py_ssize_t i
  832. freq_conv_func func
  833. asfreq_info af_info
  834. int64_t val
  835. if length == 1 and ordinals[0] == NPY_NAT:
  836. # fastpath avoid calling get_asfreq_func
  837. out[0] = NPY_NAT
  838. return
  839. func = get_asfreq_func(freq1, freq2)
  840. get_asfreq_info(freq1, freq2, end, &af_info)
  841. for i in range(length):
  842. val = ordinals[i * increment]
  843. if val != NPY_NAT:
  844. val = func(val, &af_info)
  845. out[i] = val
  846. cpdef int64_t period_ordinal(int y, int m, int d, int h, int min,
  847. int s, int us, int ps, int freq):
  848. """
  849. Find the ordinal representation of the given datetime components at the
  850. frequency `freq`.
  851. Parameters
  852. ----------
  853. y : int
  854. m : int
  855. d : int
  856. h : int
  857. min : int
  858. s : int
  859. us : int
  860. ps : int
  861. Returns
  862. -------
  863. ordinal : int64_t
  864. """
  865. cdef:
  866. npy_datetimestruct dts
  867. dts.year = y
  868. dts.month = m
  869. dts.day = d
  870. dts.hour = h
  871. dts.min = min
  872. dts.sec = s
  873. dts.us = us
  874. dts.ps = ps
  875. return get_period_ordinal(&dts, freq)
  876. cdef int64_t period_ordinal_to_dt64(int64_t ordinal, int freq) except? -1:
  877. cdef:
  878. npy_datetimestruct dts
  879. if ordinal == NPY_NAT:
  880. return NPY_NAT
  881. get_date_info(ordinal, freq, &dts)
  882. check_dts_bounds(&dts)
  883. return npy_datetimestruct_to_datetime(NPY_DATETIMEUNIT.NPY_FR_ns, &dts)
  884. cdef str period_format(int64_t value, int freq, object fmt=None):
  885. cdef:
  886. int freq_group
  887. if value == NPY_NAT:
  888. return "NaT"
  889. if isinstance(fmt, str):
  890. # Encode using current locale, in case fmt contains non-utf8 chars
  891. fmt = <bytes>util.string_encode_locale(fmt)
  892. if fmt is None:
  893. freq_group = get_freq_group(freq)
  894. if freq_group == FR_ANN:
  895. fmt = b"%Y"
  896. elif freq_group == FR_QTR:
  897. fmt = b"%FQ%q"
  898. elif freq_group == FR_MTH:
  899. fmt = b"%Y-%m"
  900. elif freq_group == FR_WK:
  901. left = period_asfreq(value, freq, FR_DAY, 0)
  902. right = period_asfreq(value, freq, FR_DAY, 1)
  903. return f"{period_format(left, FR_DAY)}/{period_format(right, FR_DAY)}"
  904. elif freq_group == FR_BUS or freq_group == FR_DAY:
  905. fmt = b"%Y-%m-%d"
  906. elif freq_group == FR_HR:
  907. fmt = b"%Y-%m-%d %H:00"
  908. elif freq_group == FR_MIN:
  909. fmt = b"%Y-%m-%d %H:%M"
  910. elif freq_group == FR_SEC:
  911. fmt = b"%Y-%m-%d %H:%M:%S"
  912. elif freq_group == FR_MS:
  913. fmt = b"%Y-%m-%d %H:%M:%S.%l"
  914. elif freq_group == FR_US:
  915. fmt = b"%Y-%m-%d %H:%M:%S.%u"
  916. elif freq_group == FR_NS:
  917. fmt = b"%Y-%m-%d %H:%M:%S.%n"
  918. else:
  919. raise ValueError(f"Unknown freq: {freq}")
  920. return _period_strftime(value, freq, fmt)
  921. cdef list extra_fmts = [(b"%q", b"^`AB`^"),
  922. (b"%f", b"^`CD`^"),
  923. (b"%F", b"^`EF`^"),
  924. (b"%l", b"^`GH`^"),
  925. (b"%u", b"^`IJ`^"),
  926. (b"%n", b"^`KL`^")]
  927. cdef list str_extra_fmts = ["^`AB`^", "^`CD`^", "^`EF`^",
  928. "^`GH`^", "^`IJ`^", "^`KL`^"]
  929. cdef str _period_strftime(int64_t value, int freq, bytes fmt):
  930. cdef:
  931. Py_ssize_t i
  932. npy_datetimestruct dts
  933. char *formatted
  934. bytes pat, brepl
  935. list found_pat = [False] * len(extra_fmts)
  936. int quarter
  937. int32_t us, ps
  938. str result, repl
  939. get_date_info(value, freq, &dts)
  940. # Find our additional directives in the pattern and replace them with
  941. # placeholders that are not processed by c_strftime
  942. for i in range(len(extra_fmts)):
  943. pat = extra_fmts[i][0]
  944. brepl = extra_fmts[i][1]
  945. if pat in fmt:
  946. fmt = fmt.replace(pat, brepl)
  947. found_pat[i] = True
  948. # Execute c_strftime to process the usual datetime directives
  949. formatted = c_strftime(&dts, <char*>fmt)
  950. # Decode result according to current locale
  951. result = util.char_to_string_locale(formatted)
  952. free(formatted)
  953. # Now we will fill the placeholders corresponding to our additional directives
  954. # First prepare the contents
  955. # Save these to local vars as dts can be modified by get_yq below
  956. us = dts.us
  957. ps = dts.ps
  958. if any(found_pat[0:3]):
  959. # Note: this modifies `dts` in-place so that year becomes fiscal year
  960. # However it looses the us and ps
  961. quarter = get_yq(value, freq, &dts)
  962. else:
  963. quarter = 0
  964. # Now do the filling per se
  965. for i in range(len(extra_fmts)):
  966. if found_pat[i]:
  967. if i == 0: # %q, 1-digit quarter.
  968. repl = f"{quarter}"
  969. elif i == 1: # %f, 2-digit 'Fiscal' year
  970. repl = f"{(dts.year % 100):02d}"
  971. elif i == 2: # %F, 'Fiscal' year with a century
  972. repl = str(dts.year)
  973. elif i == 3: # %l, milliseconds
  974. repl = f"{(us // 1_000):03d}"
  975. elif i == 4: # %u, microseconds
  976. repl = f"{(us):06d}"
  977. elif i == 5: # %n, nanoseconds
  978. repl = f"{((us * 1000) + (ps // 1000)):09d}"
  979. result = result.replace(str_extra_fmts[i], repl)
  980. return result
  981. # ----------------------------------------------------------------------
  982. # period accessors
  983. ctypedef int (*accessor)(int64_t ordinal, int freq) except INT32_MIN
  984. cdef int pyear(int64_t ordinal, int freq):
  985. cdef:
  986. npy_datetimestruct dts
  987. get_date_info(ordinal, freq, &dts)
  988. return dts.year
  989. cdef int pqyear(int64_t ordinal, int freq):
  990. cdef:
  991. npy_datetimestruct dts
  992. get_yq(ordinal, freq, &dts)
  993. return dts.year
  994. cdef int pquarter(int64_t ordinal, int freq):
  995. cdef:
  996. int quarter
  997. npy_datetimestruct dts
  998. quarter = get_yq(ordinal, freq, &dts)
  999. return quarter
  1000. cdef int pmonth(int64_t ordinal, int freq):
  1001. cdef:
  1002. npy_datetimestruct dts
  1003. get_date_info(ordinal, freq, &dts)
  1004. return dts.month
  1005. cdef int pday(int64_t ordinal, int freq):
  1006. cdef:
  1007. npy_datetimestruct dts
  1008. get_date_info(ordinal, freq, &dts)
  1009. return dts.day
  1010. cdef int pweekday(int64_t ordinal, int freq):
  1011. cdef:
  1012. npy_datetimestruct dts
  1013. get_date_info(ordinal, freq, &dts)
  1014. return dayofweek(dts.year, dts.month, dts.day)
  1015. cdef int pday_of_year(int64_t ordinal, int freq):
  1016. cdef:
  1017. npy_datetimestruct dts
  1018. get_date_info(ordinal, freq, &dts)
  1019. return get_day_of_year(dts.year, dts.month, dts.day)
  1020. cdef int pweek(int64_t ordinal, int freq):
  1021. cdef:
  1022. npy_datetimestruct dts
  1023. get_date_info(ordinal, freq, &dts)
  1024. return get_week_of_year(dts.year, dts.month, dts.day)
  1025. cdef int phour(int64_t ordinal, int freq):
  1026. cdef:
  1027. npy_datetimestruct dts
  1028. get_date_info(ordinal, freq, &dts)
  1029. return dts.hour
  1030. cdef int pminute(int64_t ordinal, int freq):
  1031. cdef:
  1032. npy_datetimestruct dts
  1033. get_date_info(ordinal, freq, &dts)
  1034. return dts.min
  1035. cdef int psecond(int64_t ordinal, int freq):
  1036. cdef:
  1037. npy_datetimestruct dts
  1038. get_date_info(ordinal, freq, &dts)
  1039. return <int>dts.sec
  1040. cdef int pdays_in_month(int64_t ordinal, int freq):
  1041. cdef:
  1042. npy_datetimestruct dts
  1043. get_date_info(ordinal, freq, &dts)
  1044. return get_days_in_month(dts.year, dts.month)
  1045. @cython.wraparound(False)
  1046. @cython.boundscheck(False)
  1047. def get_period_field_arr(str field, const int64_t[:] arr, int freq):
  1048. cdef:
  1049. Py_ssize_t i, sz
  1050. int64_t[::1] out
  1051. func = _get_accessor_func(field)
  1052. if func is NULL:
  1053. raise ValueError(f"Unrecognized field name: {field}")
  1054. sz = len(arr)
  1055. out = np.empty(sz, dtype=np.int64)
  1056. for i in range(sz):
  1057. if arr[i] == NPY_NAT:
  1058. out[i] = -1
  1059. continue
  1060. out[i] = func(arr[i], freq)
  1061. return out.base # .base to access underlying np.ndarray
  1062. cdef accessor _get_accessor_func(str field):
  1063. if field == "year":
  1064. return <accessor>pyear
  1065. elif field == "qyear":
  1066. return <accessor>pqyear
  1067. elif field == "quarter":
  1068. return <accessor>pquarter
  1069. elif field == "month":
  1070. return <accessor>pmonth
  1071. elif field == "day":
  1072. return <accessor>pday
  1073. elif field == "hour":
  1074. return <accessor>phour
  1075. elif field == "minute":
  1076. return <accessor>pminute
  1077. elif field == "second":
  1078. return <accessor>psecond
  1079. elif field == "week":
  1080. return <accessor>pweek
  1081. elif field == "day_of_year":
  1082. return <accessor>pday_of_year
  1083. elif field == "weekday" or field == "day_of_week":
  1084. return <accessor>pweekday
  1085. elif field == "days_in_month":
  1086. return <accessor>pdays_in_month
  1087. return NULL
  1088. @cython.wraparound(False)
  1089. @cython.boundscheck(False)
  1090. def from_ordinals(const int64_t[:] values, freq):
  1091. cdef:
  1092. Py_ssize_t i, n = len(values)
  1093. int64_t[::1] result = np.empty(len(values), dtype="i8")
  1094. int64_t val
  1095. freq = to_offset(freq)
  1096. if not isinstance(freq, BaseOffset):
  1097. raise ValueError("freq not specified and cannot be inferred")
  1098. for i in range(n):
  1099. val = values[i]
  1100. if val == NPY_NAT:
  1101. result[i] = NPY_NAT
  1102. else:
  1103. result[i] = Period(val, freq=freq).ordinal
  1104. return result.base
  1105. @cython.wraparound(False)
  1106. @cython.boundscheck(False)
  1107. def extract_ordinals(ndarray values, freq) -> np.ndarray:
  1108. # values is object-dtype, may be 2D
  1109. cdef:
  1110. Py_ssize_t i, n = values.size
  1111. int64_t ordinal
  1112. ndarray ordinals = cnp.PyArray_EMPTY(
  1113. values.ndim, values.shape, cnp.NPY_INT64, 0
  1114. )
  1115. cnp.broadcast mi = cnp.PyArray_MultiIterNew2(ordinals, values)
  1116. object p
  1117. if values.descr.type_num != cnp.NPY_OBJECT:
  1118. # if we don't raise here, we'll segfault later!
  1119. raise TypeError("extract_ordinals values must be object-dtype")
  1120. freqstr = Period._maybe_convert_freq(freq).freqstr
  1121. for i in range(n):
  1122. # Analogous to: p = values[i]
  1123. p = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
  1124. ordinal = _extract_ordinal(p, freqstr, freq)
  1125. # Analogous to: ordinals[i] = ordinal
  1126. (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ordinal
  1127. cnp.PyArray_MultiIter_NEXT(mi)
  1128. return ordinals
  1129. cdef int64_t _extract_ordinal(object item, str freqstr, freq) except? -1:
  1130. """
  1131. See extract_ordinals.
  1132. """
  1133. cdef:
  1134. int64_t ordinal
  1135. if checknull_with_nat(item) or item is C_NA:
  1136. ordinal = NPY_NAT
  1137. elif util.is_integer_object(item):
  1138. if item == NPY_NAT:
  1139. ordinal = NPY_NAT
  1140. else:
  1141. raise TypeError(item)
  1142. else:
  1143. try:
  1144. ordinal = item.ordinal
  1145. if item.freqstr != freqstr:
  1146. msg = DIFFERENT_FREQ.format(cls="PeriodIndex",
  1147. own_freq=freqstr,
  1148. other_freq=item.freqstr)
  1149. raise IncompatibleFrequency(msg)
  1150. except AttributeError:
  1151. item = Period(item, freq=freq)
  1152. if item is NaT:
  1153. # input may contain NaT-like string
  1154. ordinal = NPY_NAT
  1155. else:
  1156. ordinal = item.ordinal
  1157. return ordinal
  1158. def extract_freq(ndarray[object] values) -> BaseOffset:
  1159. # TODO: Change type to const object[:] when Cython supports that.
  1160. cdef:
  1161. Py_ssize_t i, n = len(values)
  1162. object value
  1163. for i in range(n):
  1164. value = values[i]
  1165. if is_period_object(value):
  1166. return value.freq
  1167. raise ValueError("freq not specified and cannot be inferred")
  1168. # -----------------------------------------------------------------------
  1169. # period helpers
  1170. DIFFERENT_FREQ = ("Input has different freq={other_freq} "
  1171. "from {cls}(freq={own_freq})")
  1172. class IncompatibleFrequency(ValueError):
  1173. pass
  1174. cdef class PeriodMixin:
  1175. # Methods shared between Period and PeriodArray
  1176. @property
  1177. def start_time(self) -> Timestamp:
  1178. """
  1179. Get the Timestamp for the start of the period.
  1180. Returns
  1181. -------
  1182. Timestamp
  1183. See Also
  1184. --------
  1185. Period.end_time : Return the end Timestamp.
  1186. Period.dayofyear : Return the day of year.
  1187. Period.daysinmonth : Return the days in that month.
  1188. Period.dayofweek : Return the day of the week.
  1189. Examples
  1190. --------
  1191. >>> period = pd.Period('2012-1-1', freq='D')
  1192. >>> period
  1193. Period('2012-01-01', 'D')
  1194. >>> period.start_time
  1195. Timestamp('2012-01-01 00:00:00')
  1196. >>> period.end_time
  1197. Timestamp('2012-01-01 23:59:59.999999999')
  1198. """
  1199. return self.to_timestamp(how="start")
  1200. @property
  1201. def end_time(self) -> Timestamp:
  1202. """
  1203. Get the Timestamp for the end of the period.
  1204. Returns
  1205. -------
  1206. Timestamp
  1207. See Also
  1208. --------
  1209. Period.start_time : Return the start Timestamp.
  1210. Period.dayofyear : Return the day of year.
  1211. Period.daysinmonth : Return the days in that month.
  1212. Period.dayofweek : Return the day of the week.
  1213. """
  1214. return self.to_timestamp(how="end")
  1215. def _require_matching_freq(self, other, base=False):
  1216. # See also arrays.period.raise_on_incompatible
  1217. if is_offset_object(other):
  1218. other_freq = other
  1219. else:
  1220. other_freq = other.freq
  1221. if base:
  1222. condition = self.freq.base != other_freq.base
  1223. else:
  1224. condition = self.freq != other_freq
  1225. if condition:
  1226. msg = DIFFERENT_FREQ.format(
  1227. cls=type(self).__name__,
  1228. own_freq=self.freqstr,
  1229. other_freq=other_freq.freqstr,
  1230. )
  1231. raise IncompatibleFrequency(msg)
  1232. cdef class _Period(PeriodMixin):
  1233. cdef readonly:
  1234. int64_t ordinal
  1235. PeriodDtypeBase _dtype
  1236. BaseOffset freq
  1237. # higher than np.ndarray, np.matrix, np.timedelta64
  1238. __array_priority__ = 100
  1239. dayofweek = _Period.day_of_week
  1240. dayofyear = _Period.day_of_year
  1241. def __cinit__(self, int64_t ordinal, BaseOffset freq):
  1242. self.ordinal = ordinal
  1243. self.freq = freq
  1244. # Note: this is more performant than PeriodDtype.from_date_offset(freq)
  1245. # because from_date_offset cannot be made a cdef method (until cython
  1246. # supported cdef classmethods)
  1247. self._dtype = PeriodDtypeBase(freq._period_dtype_code)
  1248. @classmethod
  1249. def _maybe_convert_freq(cls, object freq) -> BaseOffset:
  1250. """
  1251. Internally we allow integer and tuple representations (for now) that
  1252. are not recognized by to_offset, so we convert them here. Also, a
  1253. Period's freq attribute must have `freq.n > 0`, which we check for here.
  1254. Returns
  1255. -------
  1256. DateOffset
  1257. """
  1258. if isinstance(freq, int):
  1259. # We already have a dtype code
  1260. dtype = PeriodDtypeBase(freq)
  1261. freq = dtype._freqstr
  1262. freq = to_offset(freq)
  1263. if freq.n <= 0:
  1264. raise ValueError("Frequency must be positive, because it "
  1265. f"represents span: {freq.freqstr}")
  1266. return freq
  1267. @classmethod
  1268. def _from_ordinal(cls, ordinal: int64_t, freq) -> "Period":
  1269. """
  1270. Fast creation from an ordinal and freq that are already validated!
  1271. """
  1272. if ordinal == NPY_NAT:
  1273. return NaT
  1274. else:
  1275. freq = cls._maybe_convert_freq(freq)
  1276. self = _Period.__new__(cls, ordinal, freq)
  1277. return self
  1278. def __richcmp__(self, other, op):
  1279. if is_period_object(other):
  1280. if other.freq != self.freq:
  1281. if op == Py_EQ:
  1282. return False
  1283. elif op == Py_NE:
  1284. return True
  1285. self._require_matching_freq(other)
  1286. return PyObject_RichCompareBool(self.ordinal, other.ordinal, op)
  1287. elif other is NaT:
  1288. return op == Py_NE
  1289. elif util.is_array(other):
  1290. # GH#44285
  1291. if cnp.PyArray_IsZeroDim(other):
  1292. return PyObject_RichCompare(self, other.item(), op)
  1293. else:
  1294. # in particular ndarray[object]; see test_pi_cmp_period
  1295. return np.array([PyObject_RichCompare(self, x, op) for x in other])
  1296. return NotImplemented
  1297. def __hash__(self):
  1298. return hash((self.ordinal, self.freqstr))
  1299. def _add_timedeltalike_scalar(self, other) -> "Period":
  1300. cdef:
  1301. int64_t inc
  1302. if not is_tick_object(self.freq):
  1303. raise IncompatibleFrequency("Input cannot be converted to "
  1304. f"Period(freq={self.freqstr})")
  1305. if (
  1306. util.is_timedelta64_object(other) and
  1307. get_timedelta64_value(other) == NPY_NAT
  1308. ):
  1309. # i.e. np.timedelta64("nat")
  1310. return NaT
  1311. try:
  1312. inc = delta_to_nanoseconds(other, reso=self.freq._creso, round_ok=False)
  1313. except ValueError as err:
  1314. raise IncompatibleFrequency("Input cannot be converted to "
  1315. f"Period(freq={self.freqstr})") from err
  1316. # TODO: overflow-check here
  1317. ordinal = self.ordinal + inc
  1318. return Period(ordinal=ordinal, freq=self.freq)
  1319. def _add_offset(self, other) -> "Period":
  1320. # Non-Tick DateOffset other
  1321. cdef:
  1322. int64_t ordinal
  1323. self._require_matching_freq(other, base=True)
  1324. ordinal = self.ordinal + other.n
  1325. return Period(ordinal=ordinal, freq=self.freq)
  1326. def __add__(self, other):
  1327. if not is_period_object(self):
  1328. # cython semantics; this is analogous to a call to __radd__
  1329. # TODO(cython3): remove this
  1330. if self is NaT:
  1331. return NaT
  1332. return other.__add__(self)
  1333. if is_any_td_scalar(other):
  1334. return self._add_timedeltalike_scalar(other)
  1335. elif is_offset_object(other):
  1336. return self._add_offset(other)
  1337. elif other is NaT:
  1338. return NaT
  1339. elif util.is_integer_object(other):
  1340. ordinal = self.ordinal + other * self.freq.n
  1341. return Period(ordinal=ordinal, freq=self.freq)
  1342. elif is_period_object(other):
  1343. # can't add datetime-like
  1344. # GH#17983; can't just return NotImplemented bc we get a RecursionError
  1345. # when called via np.add.reduce see TestNumpyReductions.test_add
  1346. # in npdev build
  1347. sname = type(self).__name__
  1348. oname = type(other).__name__
  1349. raise TypeError(f"unsupported operand type(s) for +: '{sname}' "
  1350. f"and '{oname}'")
  1351. elif util.is_array(other):
  1352. if other.dtype == object:
  1353. # GH#50162
  1354. return np.array([self + x for x in other], dtype=object)
  1355. return NotImplemented
  1356. def __radd__(self, other):
  1357. return self.__add__(other)
  1358. def __sub__(self, other):
  1359. if not is_period_object(self):
  1360. # cython semantics; this is like a call to __rsub__
  1361. # TODO(cython3): remove this
  1362. if self is NaT:
  1363. return NaT
  1364. return NotImplemented
  1365. elif (
  1366. is_any_td_scalar(other)
  1367. or is_offset_object(other)
  1368. or util.is_integer_object(other)
  1369. ):
  1370. return self + (-other)
  1371. elif is_period_object(other):
  1372. self._require_matching_freq(other)
  1373. # GH 23915 - mul by base freq since __add__ is agnostic of n
  1374. return (self.ordinal - other.ordinal) * self.freq.base
  1375. elif other is NaT:
  1376. return NaT
  1377. elif util.is_array(other):
  1378. if other.dtype == object:
  1379. # GH#50162
  1380. return np.array([self - x for x in other], dtype=object)
  1381. return NotImplemented
  1382. def __rsub__(self, other):
  1383. if other is NaT:
  1384. return NaT
  1385. elif util.is_array(other):
  1386. if other.dtype == object:
  1387. # GH#50162
  1388. return np.array([x - self for x in other], dtype=object)
  1389. return NotImplemented
  1390. def asfreq(self, freq, how="E") -> "Period":
  1391. """
  1392. Convert Period to desired frequency, at the start or end of the interval.
  1393. Parameters
  1394. ----------
  1395. freq : str, BaseOffset
  1396. The desired frequency.
  1397. how : {'E', 'S', 'end', 'start'}, default 'end'
  1398. Start or end of the timespan.
  1399. Returns
  1400. -------
  1401. resampled : Period
  1402. """
  1403. freq = self._maybe_convert_freq(freq)
  1404. how = validate_end_alias(how)
  1405. base1 = self._dtype._dtype_code
  1406. base2 = freq_to_dtype_code(freq)
  1407. # self.n can't be negative or 0
  1408. end = how == "E"
  1409. if end:
  1410. ordinal = self.ordinal + self.freq.n - 1
  1411. else:
  1412. ordinal = self.ordinal
  1413. ordinal = period_asfreq(ordinal, base1, base2, end)
  1414. return Period(ordinal=ordinal, freq=freq)
  1415. def to_timestamp(self, freq=None, how="start") -> Timestamp:
  1416. """
  1417. Return the Timestamp representation of the Period.
  1418. Uses the target frequency specified at the part of the period specified
  1419. by `how`, which is either `Start` or `Finish`.
  1420. Parameters
  1421. ----------
  1422. freq : str or DateOffset
  1423. Target frequency. Default is 'D' if self.freq is week or
  1424. longer and 'S' otherwise.
  1425. how : str, default 'S' (start)
  1426. One of 'S', 'E'. Can be aliased as case insensitive
  1427. 'Start', 'Finish', 'Begin', 'End'.
  1428. Returns
  1429. -------
  1430. Timestamp
  1431. Examples
  1432. --------
  1433. >>> period = pd.Period('2023-1-1', freq='D')
  1434. >>> timestamp = period.to_timestamp()
  1435. >>> timestamp
  1436. Timestamp('2023-01-01 00:00:00')
  1437. """
  1438. how = validate_end_alias(how)
  1439. end = how == "E"
  1440. if end:
  1441. if freq == "B" or self.freq == "B":
  1442. # roll forward to ensure we land on B date
  1443. adjust = np.timedelta64(1, "D") - np.timedelta64(1, "ns")
  1444. return self.to_timestamp(how="start") + adjust
  1445. endpoint = (self + self.freq).to_timestamp(how="start")
  1446. return endpoint - np.timedelta64(1, "ns")
  1447. if freq is None:
  1448. freq = self._dtype._get_to_timestamp_base()
  1449. base = freq
  1450. else:
  1451. freq = self._maybe_convert_freq(freq)
  1452. base = freq._period_dtype_code
  1453. val = self.asfreq(freq, how)
  1454. dt64 = period_ordinal_to_dt64(val.ordinal, base)
  1455. return Timestamp(dt64)
  1456. @property
  1457. def year(self) -> int:
  1458. """
  1459. Return the year this Period falls on.
  1460. """
  1461. base = self._dtype._dtype_code
  1462. return pyear(self.ordinal, base)
  1463. @property
  1464. def month(self) -> int:
  1465. """
  1466. Return the month this Period falls on.
  1467. """
  1468. base = self._dtype._dtype_code
  1469. return pmonth(self.ordinal, base)
  1470. @property
  1471. def day(self) -> int:
  1472. """
  1473. Get day of the month that a Period falls on.
  1474. Returns
  1475. -------
  1476. int
  1477. See Also
  1478. --------
  1479. Period.dayofweek : Get the day of the week.
  1480. Period.dayofyear : Get the day of the year.
  1481. Examples
  1482. --------
  1483. >>> p = pd.Period("2018-03-11", freq='H')
  1484. >>> p.day
  1485. 11
  1486. """
  1487. base = self._dtype._dtype_code
  1488. return pday(self.ordinal, base)
  1489. @property
  1490. def hour(self) -> int:
  1491. """
  1492. Get the hour of the day component of the Period.
  1493. Returns
  1494. -------
  1495. int
  1496. The hour as an integer, between 0 and 23.
  1497. See Also
  1498. --------
  1499. Period.second : Get the second component of the Period.
  1500. Period.minute : Get the minute component of the Period.
  1501. Examples
  1502. --------
  1503. >>> p = pd.Period("2018-03-11 13:03:12.050000")
  1504. >>> p.hour
  1505. 13
  1506. Period longer than a day
  1507. >>> p = pd.Period("2018-03-11", freq="M")
  1508. >>> p.hour
  1509. 0
  1510. """
  1511. base = self._dtype._dtype_code
  1512. return phour(self.ordinal, base)
  1513. @property
  1514. def minute(self) -> int:
  1515. """
  1516. Get minute of the hour component of the Period.
  1517. Returns
  1518. -------
  1519. int
  1520. The minute as an integer, between 0 and 59.
  1521. See Also
  1522. --------
  1523. Period.hour : Get the hour component of the Period.
  1524. Period.second : Get the second component of the Period.
  1525. Examples
  1526. --------
  1527. >>> p = pd.Period("2018-03-11 13:03:12.050000")
  1528. >>> p.minute
  1529. 3
  1530. """
  1531. base = self._dtype._dtype_code
  1532. return pminute(self.ordinal, base)
  1533. @property
  1534. def second(self) -> int:
  1535. """
  1536. Get the second component of the Period.
  1537. Returns
  1538. -------
  1539. int
  1540. The second of the Period (ranges from 0 to 59).
  1541. See Also
  1542. --------
  1543. Period.hour : Get the hour component of the Period.
  1544. Period.minute : Get the minute component of the Period.
  1545. Examples
  1546. --------
  1547. >>> p = pd.Period("2018-03-11 13:03:12.050000")
  1548. >>> p.second
  1549. 12
  1550. """
  1551. base = self._dtype._dtype_code
  1552. return psecond(self.ordinal, base)
  1553. @property
  1554. def weekofyear(self) -> int:
  1555. """
  1556. Get the week of the year on the given Period.
  1557. Returns
  1558. -------
  1559. int
  1560. See Also
  1561. --------
  1562. Period.dayofweek : Get the day component of the Period.
  1563. Period.weekday : Get the day component of the Period.
  1564. Examples
  1565. --------
  1566. >>> p = pd.Period("2018-03-11", "H")
  1567. >>> p.weekofyear
  1568. 10
  1569. >>> p = pd.Period("2018-02-01", "D")
  1570. >>> p.weekofyear
  1571. 5
  1572. >>> p = pd.Period("2018-01-06", "D")
  1573. >>> p.weekofyear
  1574. 1
  1575. """
  1576. base = self._dtype._dtype_code
  1577. return pweek(self.ordinal, base)
  1578. @property
  1579. def week(self) -> int:
  1580. """
  1581. Get the week of the year on the given Period.
  1582. Returns
  1583. -------
  1584. int
  1585. See Also
  1586. --------
  1587. Period.dayofweek : Get the day component of the Period.
  1588. Period.weekday : Get the day component of the Period.
  1589. Examples
  1590. --------
  1591. >>> p = pd.Period("2018-03-11", "H")
  1592. >>> p.week
  1593. 10
  1594. >>> p = pd.Period("2018-02-01", "D")
  1595. >>> p.week
  1596. 5
  1597. >>> p = pd.Period("2018-01-06", "D")
  1598. >>> p.week
  1599. 1
  1600. """
  1601. return self.weekofyear
  1602. @property
  1603. def day_of_week(self) -> int:
  1604. """
  1605. Day of the week the period lies in, with Monday=0 and Sunday=6.
  1606. If the period frequency is lower than daily (e.g. hourly), and the
  1607. period spans over multiple days, the day at the start of the period is
  1608. used.
  1609. If the frequency is higher than daily (e.g. monthly), the last day
  1610. of the period is used.
  1611. Returns
  1612. -------
  1613. int
  1614. Day of the week.
  1615. See Also
  1616. --------
  1617. Period.day_of_week : Day of the week the period lies in.
  1618. Period.weekday : Alias of Period.day_of_week.
  1619. Period.day : Day of the month.
  1620. Period.dayofyear : Day of the year.
  1621. Examples
  1622. --------
  1623. >>> per = pd.Period('2017-12-31 22:00', 'H')
  1624. >>> per.day_of_week
  1625. 6
  1626. For periods that span over multiple days, the day at the beginning of
  1627. the period is returned.
  1628. >>> per = pd.Period('2017-12-31 22:00', '4H')
  1629. >>> per.day_of_week
  1630. 6
  1631. >>> per.start_time.day_of_week
  1632. 6
  1633. For periods with a frequency higher than days, the last day of the
  1634. period is returned.
  1635. >>> per = pd.Period('2018-01', 'M')
  1636. >>> per.day_of_week
  1637. 2
  1638. >>> per.end_time.day_of_week
  1639. 2
  1640. """
  1641. base = self._dtype._dtype_code
  1642. return pweekday(self.ordinal, base)
  1643. @property
  1644. def weekday(self) -> int:
  1645. """
  1646. Day of the week the period lies in, with Monday=0 and Sunday=6.
  1647. If the period frequency is lower than daily (e.g. hourly), and the
  1648. period spans over multiple days, the day at the start of the period is
  1649. used.
  1650. If the frequency is higher than daily (e.g. monthly), the last day
  1651. of the period is used.
  1652. Returns
  1653. -------
  1654. int
  1655. Day of the week.
  1656. See Also
  1657. --------
  1658. Period.dayofweek : Day of the week the period lies in.
  1659. Period.weekday : Alias of Period.dayofweek.
  1660. Period.day : Day of the month.
  1661. Period.dayofyear : Day of the year.
  1662. Examples
  1663. --------
  1664. >>> per = pd.Period('2017-12-31 22:00', 'H')
  1665. >>> per.dayofweek
  1666. 6
  1667. For periods that span over multiple days, the day at the beginning of
  1668. the period is returned.
  1669. >>> per = pd.Period('2017-12-31 22:00', '4H')
  1670. >>> per.dayofweek
  1671. 6
  1672. >>> per.start_time.dayofweek
  1673. 6
  1674. For periods with a frequency higher than days, the last day of the
  1675. period is returned.
  1676. >>> per = pd.Period('2018-01', 'M')
  1677. >>> per.dayofweek
  1678. 2
  1679. >>> per.end_time.dayofweek
  1680. 2
  1681. """
  1682. # Docstring is a duplicate from dayofweek. Reusing docstrings with
  1683. # Appender doesn't work for properties in Cython files, and setting
  1684. # the __doc__ attribute is also not possible.
  1685. return self.dayofweek
  1686. @property
  1687. def day_of_year(self) -> int:
  1688. """
  1689. Return the day of the year.
  1690. This attribute returns the day of the year on which the particular
  1691. date occurs. The return value ranges between 1 to 365 for regular
  1692. years and 1 to 366 for leap years.
  1693. Returns
  1694. -------
  1695. int
  1696. The day of year.
  1697. See Also
  1698. --------
  1699. Period.day : Return the day of the month.
  1700. Period.day_of_week : Return the day of week.
  1701. PeriodIndex.day_of_year : Return the day of year of all indexes.
  1702. Examples
  1703. --------
  1704. >>> period = pd.Period("2015-10-23", freq='H')
  1705. >>> period.day_of_year
  1706. 296
  1707. >>> period = pd.Period("2012-12-31", freq='D')
  1708. >>> period.day_of_year
  1709. 366
  1710. >>> period = pd.Period("2013-01-01", freq='D')
  1711. >>> period.day_of_year
  1712. 1
  1713. """
  1714. base = self._dtype._dtype_code
  1715. return pday_of_year(self.ordinal, base)
  1716. @property
  1717. def quarter(self) -> int:
  1718. """
  1719. Return the quarter this Period falls on.
  1720. """
  1721. base = self._dtype._dtype_code
  1722. return pquarter(self.ordinal, base)
  1723. @property
  1724. def qyear(self) -> int:
  1725. """
  1726. Fiscal year the Period lies in according to its starting-quarter.
  1727. The `year` and the `qyear` of the period will be the same if the fiscal
  1728. and calendar years are the same. When they are not, the fiscal year
  1729. can be different from the calendar year of the period.
  1730. Returns
  1731. -------
  1732. int
  1733. The fiscal year of the period.
  1734. See Also
  1735. --------
  1736. Period.year : Return the calendar year of the period.
  1737. Examples
  1738. --------
  1739. If the natural and fiscal year are the same, `qyear` and `year` will
  1740. be the same.
  1741. >>> per = pd.Period('2018Q1', freq='Q')
  1742. >>> per.qyear
  1743. 2018
  1744. >>> per.year
  1745. 2018
  1746. If the fiscal year starts in April (`Q-MAR`), the first quarter of
  1747. 2018 will start in April 2017. `year` will then be 2017, but `qyear`
  1748. will be the fiscal year, 2018.
  1749. >>> per = pd.Period('2018Q1', freq='Q-MAR')
  1750. >>> per.start_time
  1751. Timestamp('2017-04-01 00:00:00')
  1752. >>> per.qyear
  1753. 2018
  1754. >>> per.year
  1755. 2017
  1756. """
  1757. base = self._dtype._dtype_code
  1758. return pqyear(self.ordinal, base)
  1759. @property
  1760. def days_in_month(self) -> int:
  1761. """
  1762. Get the total number of days in the month that this period falls on.
  1763. Returns
  1764. -------
  1765. int
  1766. See Also
  1767. --------
  1768. Period.daysinmonth : Gets the number of days in the month.
  1769. DatetimeIndex.daysinmonth : Gets the number of days in the month.
  1770. calendar.monthrange : Returns a tuple containing weekday
  1771. (0-6 ~ Mon-Sun) and number of days (28-31).
  1772. Examples
  1773. --------
  1774. >>> p = pd.Period('2018-2-17')
  1775. >>> p.days_in_month
  1776. 28
  1777. >>> pd.Period('2018-03-01').days_in_month
  1778. 31
  1779. Handles the leap year case as well:
  1780. >>> p = pd.Period('2016-2-17')
  1781. >>> p.days_in_month
  1782. 29
  1783. """
  1784. base = self._dtype._dtype_code
  1785. return pdays_in_month(self.ordinal, base)
  1786. @property
  1787. def daysinmonth(self) -> int:
  1788. """
  1789. Get the total number of days of the month that this period falls on.
  1790. Returns
  1791. -------
  1792. int
  1793. See Also
  1794. --------
  1795. Period.days_in_month : Return the days of the month.
  1796. Period.dayofyear : Return the day of the year.
  1797. Examples
  1798. --------
  1799. >>> p = pd.Period("2018-03-11", freq='H')
  1800. >>> p.daysinmonth
  1801. 31
  1802. """
  1803. return self.days_in_month
  1804. @property
  1805. def is_leap_year(self) -> bool:
  1806. """
  1807. Return True if the period's year is in a leap year.
  1808. """
  1809. return bool(is_leapyear(self.year))
  1810. @classmethod
  1811. def now(cls, freq):
  1812. """
  1813. Return the period of now's date.
  1814. Parameters
  1815. ----------
  1816. freq : str, BaseOffset
  1817. Frequency to use for the returned period.
  1818. """
  1819. return Period(datetime.now(), freq=freq)
  1820. @property
  1821. def freqstr(self) -> str:
  1822. """
  1823. Return a string representation of the frequency.
  1824. """
  1825. return self.freq.freqstr
  1826. def __repr__(self) -> str:
  1827. base = self._dtype._dtype_code
  1828. formatted = period_format(self.ordinal, base)
  1829. return f"Period('{formatted}', '{self.freqstr}')"
  1830. def __str__(self) -> str:
  1831. """
  1832. Return a string representation for a particular DataFrame
  1833. """
  1834. base = self._dtype._dtype_code
  1835. formatted = period_format(self.ordinal, base)
  1836. value = str(formatted)
  1837. return value
  1838. def __setstate__(self, state):
  1839. self.freq = state[1]
  1840. self.ordinal = state[2]
  1841. def __reduce__(self):
  1842. object_state = None, self.freq, self.ordinal
  1843. return (Period, object_state)
  1844. def strftime(self, fmt: str) -> str:
  1845. r"""
  1846. Returns a formatted string representation of the :class:`Period`.
  1847. ``fmt`` must be a string containing one or several directives.
  1848. The method recognizes the same directives as the :func:`time.strftime`
  1849. function of the standard Python distribution, as well as the specific
  1850. additional directives ``%f``, ``%F``, ``%q``, ``%l``, ``%u``, ``%n``.
  1851. (formatting & docs originally from scikits.timeries).
  1852. +-----------+--------------------------------+-------+
  1853. | Directive | Meaning | Notes |
  1854. +===========+================================+=======+
  1855. | ``%a`` | Locale's abbreviated weekday | |
  1856. | | name. | |
  1857. +-----------+--------------------------------+-------+
  1858. | ``%A`` | Locale's full weekday name. | |
  1859. +-----------+--------------------------------+-------+
  1860. | ``%b`` | Locale's abbreviated month | |
  1861. | | name. | |
  1862. +-----------+--------------------------------+-------+
  1863. | ``%B`` | Locale's full month name. | |
  1864. +-----------+--------------------------------+-------+
  1865. | ``%c`` | Locale's appropriate date and | |
  1866. | | time representation. | |
  1867. +-----------+--------------------------------+-------+
  1868. | ``%d`` | Day of the month as a decimal | |
  1869. | | number [01,31]. | |
  1870. +-----------+--------------------------------+-------+
  1871. | ``%f`` | 'Fiscal' year without a | \(1) |
  1872. | | century as a decimal number | |
  1873. | | [00,99] | |
  1874. +-----------+--------------------------------+-------+
  1875. | ``%F`` | 'Fiscal' year with a century | \(2) |
  1876. | | as a decimal number | |
  1877. +-----------+--------------------------------+-------+
  1878. | ``%H`` | Hour (24-hour clock) as a | |
  1879. | | decimal number [00,23]. | |
  1880. +-----------+--------------------------------+-------+
  1881. | ``%I`` | Hour (12-hour clock) as a | |
  1882. | | decimal number [01,12]. | |
  1883. +-----------+--------------------------------+-------+
  1884. | ``%j`` | Day of the year as a decimal | |
  1885. | | number [001,366]. | |
  1886. +-----------+--------------------------------+-------+
  1887. | ``%m`` | Month as a decimal number | |
  1888. | | [01,12]. | |
  1889. +-----------+--------------------------------+-------+
  1890. | ``%M`` | Minute as a decimal number | |
  1891. | | [00,59]. | |
  1892. +-----------+--------------------------------+-------+
  1893. | ``%p`` | Locale's equivalent of either | \(3) |
  1894. | | AM or PM. | |
  1895. +-----------+--------------------------------+-------+
  1896. | ``%q`` | Quarter as a decimal number | |
  1897. | | [1,4] | |
  1898. +-----------+--------------------------------+-------+
  1899. | ``%S`` | Second as a decimal number | \(4) |
  1900. | | [00,61]. | |
  1901. +-----------+--------------------------------+-------+
  1902. | ``%l`` | Millisecond as a decimal number| |
  1903. | | [000,999]. | |
  1904. +-----------+--------------------------------+-------+
  1905. | ``%u`` | Microsecond as a decimal number| |
  1906. | | [000000,999999]. | |
  1907. +-----------+--------------------------------+-------+
  1908. | ``%n`` | Nanosecond as a decimal number | |
  1909. | | [000000000,999999999]. | |
  1910. +-----------+--------------------------------+-------+
  1911. | ``%U`` | Week number of the year | \(5) |
  1912. | | (Sunday as the first day of | |
  1913. | | the week) as a decimal number | |
  1914. | | [00,53]. All days in a new | |
  1915. | | year preceding the first | |
  1916. | | Sunday are considered to be in | |
  1917. | | week 0. | |
  1918. +-----------+--------------------------------+-------+
  1919. | ``%w`` | Weekday as a decimal number | |
  1920. | | [0(Sunday),6]. | |
  1921. +-----------+--------------------------------+-------+
  1922. | ``%W`` | Week number of the year | \(5) |
  1923. | | (Monday as the first day of | |
  1924. | | the week) as a decimal number | |
  1925. | | [00,53]. All days in a new | |
  1926. | | year preceding the first | |
  1927. | | Monday are considered to be in | |
  1928. | | week 0. | |
  1929. +-----------+--------------------------------+-------+
  1930. | ``%x`` | Locale's appropriate date | |
  1931. | | representation. | |
  1932. +-----------+--------------------------------+-------+
  1933. | ``%X`` | Locale's appropriate time | |
  1934. | | representation. | |
  1935. +-----------+--------------------------------+-------+
  1936. | ``%y`` | Year without century as a | |
  1937. | | decimal number [00,99]. | |
  1938. +-----------+--------------------------------+-------+
  1939. | ``%Y`` | Year with century as a decimal | |
  1940. | | number. | |
  1941. +-----------+--------------------------------+-------+
  1942. | ``%Z`` | Time zone name (no characters | |
  1943. | | if no time zone exists). | |
  1944. +-----------+--------------------------------+-------+
  1945. | ``%%`` | A literal ``'%'`` character. | |
  1946. +-----------+--------------------------------+-------+
  1947. Notes
  1948. -----
  1949. (1)
  1950. The ``%f`` directive is the same as ``%y`` if the frequency is
  1951. not quarterly.
  1952. Otherwise, it corresponds to the 'fiscal' year, as defined by
  1953. the :attr:`qyear` attribute.
  1954. (2)
  1955. The ``%F`` directive is the same as ``%Y`` if the frequency is
  1956. not quarterly.
  1957. Otherwise, it corresponds to the 'fiscal' year, as defined by
  1958. the :attr:`qyear` attribute.
  1959. (3)
  1960. The ``%p`` directive only affects the output hour field
  1961. if the ``%I`` directive is used to parse the hour.
  1962. (4)
  1963. The range really is ``0`` to ``61``; this accounts for leap
  1964. seconds and the (very rare) double leap seconds.
  1965. (5)
  1966. The ``%U`` and ``%W`` directives are only used in calculations
  1967. when the day of the week and the year are specified.
  1968. Examples
  1969. --------
  1970. >>> from pandas import Period
  1971. >>> a = Period(freq='Q-JUL', year=2006, quarter=1)
  1972. >>> a.strftime('%F-Q%q')
  1973. '2006-Q1'
  1974. >>> # Output the last month in the quarter of this date
  1975. >>> a.strftime('%b-%Y')
  1976. 'Oct-2005'
  1977. >>>
  1978. >>> a = Period(freq='D', year=2001, month=1, day=1)
  1979. >>> a.strftime('%d-%b-%Y')
  1980. '01-Jan-2001'
  1981. >>> a.strftime('%b. %d, %Y was a %A')
  1982. 'Jan. 01, 2001 was a Monday'
  1983. """
  1984. base = self._dtype._dtype_code
  1985. return period_format(self.ordinal, base, fmt)
  1986. class Period(_Period):
  1987. """
  1988. Represents a period of time.
  1989. Parameters
  1990. ----------
  1991. value : Period or str, default None
  1992. The time period represented (e.g., '4Q2005'). This represents neither
  1993. the start or the end of the period, but rather the entire period itself.
  1994. freq : str, default None
  1995. One of pandas period strings or corresponding objects. Accepted
  1996. strings are listed in the
  1997. :ref:`offset alias section <timeseries.offset_aliases>` in the user docs.
  1998. ordinal : int, default None
  1999. The period offset from the proleptic Gregorian epoch.
  2000. year : int, default None
  2001. Year value of the period.
  2002. month : int, default 1
  2003. Month value of the period.
  2004. quarter : int, default None
  2005. Quarter value of the period.
  2006. day : int, default 1
  2007. Day value of the period.
  2008. hour : int, default 0
  2009. Hour value of the period.
  2010. minute : int, default 0
  2011. Minute value of the period.
  2012. second : int, default 0
  2013. Second value of the period.
  2014. Examples
  2015. --------
  2016. >>> period = pd.Period('2012-1-1', freq='D')
  2017. >>> period
  2018. Period('2012-01-01', 'D')
  2019. """
  2020. def __new__(cls, value=None, freq=None, ordinal=None,
  2021. year=None, month=None, quarter=None, day=None,
  2022. hour=None, minute=None, second=None):
  2023. # freq points to a tuple (base, mult); base is one of the defined
  2024. # periods such as A, Q, etc. Every five minutes would be, e.g.,
  2025. # ('T', 5) but may be passed in as a string like '5T'
  2026. # ordinal is the period offset from the gregorian proleptic epoch
  2027. if freq is not None:
  2028. freq = cls._maybe_convert_freq(freq)
  2029. nanosecond = 0
  2030. if ordinal is not None and value is not None:
  2031. raise ValueError("Only value or ordinal but not both should be "
  2032. "given but not both")
  2033. elif ordinal is not None:
  2034. if not util.is_integer_object(ordinal):
  2035. raise ValueError("Ordinal must be an integer")
  2036. if freq is None:
  2037. raise ValueError("Must supply freq for ordinal value")
  2038. elif value is None:
  2039. if (year is None and month is None and
  2040. quarter is None and day is None and
  2041. hour is None and minute is None and second is None):
  2042. ordinal = NPY_NAT
  2043. else:
  2044. if freq is None:
  2045. raise ValueError("If value is None, freq cannot be None")
  2046. # set defaults
  2047. month = 1 if month is None else month
  2048. day = 1 if day is None else day
  2049. hour = 0 if hour is None else hour
  2050. minute = 0 if minute is None else minute
  2051. second = 0 if second is None else second
  2052. ordinal = _ordinal_from_fields(year, month, quarter, day,
  2053. hour, minute, second, freq)
  2054. elif is_period_object(value):
  2055. other = value
  2056. if freq is None or freq._period_dtype_code == other.freq._period_dtype_code:
  2057. ordinal = other.ordinal
  2058. freq = other.freq
  2059. else:
  2060. converted = other.asfreq(freq)
  2061. ordinal = converted.ordinal
  2062. elif checknull_with_nat(value) or (isinstance(value, str) and
  2063. (value in nat_strings or len(value) == 0)):
  2064. # explicit str check is necessary to avoid raising incorrectly
  2065. # if we have a non-hashable value.
  2066. ordinal = NPY_NAT
  2067. elif isinstance(value, str) or util.is_integer_object(value):
  2068. if util.is_integer_object(value):
  2069. if value == NPY_NAT:
  2070. value = "NaT"
  2071. value = str(value)
  2072. value = value.upper()
  2073. freqstr = freq.rule_code if freq is not None else None
  2074. try:
  2075. dt, reso = parse_datetime_string_with_reso(value, freqstr)
  2076. except ValueError as err:
  2077. match = re.search(r"^\d{4}-\d{2}-\d{2}/\d{4}-\d{2}-\d{2}", value)
  2078. if match:
  2079. # Case that cannot be parsed (correctly) by our datetime
  2080. # parsing logic
  2081. dt, freq = _parse_weekly_str(value, freq)
  2082. else:
  2083. raise err
  2084. else:
  2085. if reso == "nanosecond":
  2086. nanosecond = dt.nanosecond
  2087. if dt is NaT:
  2088. ordinal = NPY_NAT
  2089. if freq is None and ordinal != NPY_NAT:
  2090. # Skip NaT, since it doesn't have a resolution
  2091. freq = attrname_to_abbrevs[reso]
  2092. freq = to_offset(freq)
  2093. elif PyDateTime_Check(value):
  2094. dt = value
  2095. if freq is None:
  2096. raise ValueError("Must supply freq for datetime value")
  2097. if isinstance(dt, Timestamp):
  2098. nanosecond = dt.nanosecond
  2099. elif util.is_datetime64_object(value):
  2100. dt = Timestamp(value)
  2101. if freq is None:
  2102. raise ValueError("Must supply freq for datetime value")
  2103. nanosecond = dt.nanosecond
  2104. elif PyDate_Check(value):
  2105. dt = datetime(year=value.year, month=value.month, day=value.day)
  2106. if freq is None:
  2107. raise ValueError("Must supply freq for datetime value")
  2108. else:
  2109. msg = "Value must be Period, string, integer, or datetime"
  2110. raise ValueError(msg)
  2111. if ordinal is None:
  2112. base = freq_to_dtype_code(freq)
  2113. ordinal = period_ordinal(dt.year, dt.month, dt.day,
  2114. dt.hour, dt.minute, dt.second,
  2115. dt.microsecond, 1000*nanosecond, base)
  2116. return cls._from_ordinal(ordinal, freq)
  2117. cdef bint is_period_object(object obj):
  2118. return isinstance(obj, _Period)
  2119. cpdef int freq_to_dtype_code(BaseOffset freq) except? -1:
  2120. try:
  2121. return freq._period_dtype_code
  2122. except AttributeError as err:
  2123. raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) from err
  2124. cdef int64_t _ordinal_from_fields(int year, int month, quarter, int day,
  2125. int hour, int minute, int second,
  2126. BaseOffset freq):
  2127. base = freq_to_dtype_code(freq)
  2128. if quarter is not None:
  2129. year, month = quarter_to_myear(year, quarter, freq.freqstr)
  2130. return period_ordinal(year, month, day, hour,
  2131. minute, second, 0, 0, base)
  2132. def validate_end_alias(how: str) -> str: # Literal["E", "S"]
  2133. how_dict = {"S": "S", "E": "E",
  2134. "START": "S", "FINISH": "E",
  2135. "BEGIN": "S", "END": "E"}
  2136. how = how_dict.get(str(how).upper())
  2137. if how not in {"S", "E"}:
  2138. raise ValueError("How must be one of S or E")
  2139. return how
  2140. cdef _parse_weekly_str(value, BaseOffset freq):
  2141. """
  2142. Parse e.g. "2017-01-23/2017-01-29", which cannot be parsed by the general
  2143. datetime-parsing logic. This ensures that we can round-trip with
  2144. Period.__str__ with weekly freq.
  2145. """
  2146. # GH#50803
  2147. start, end = value.split("/")
  2148. start = Timestamp(start)
  2149. end = Timestamp(end)
  2150. if (end - start).days != 6:
  2151. # We are interested in cases where this is str(period)
  2152. # of a Week-freq period
  2153. raise ValueError("Could not parse as weekly-freq Period")
  2154. if freq is None:
  2155. day_name = end.day_name()[:3].upper()
  2156. freqstr = f"W-{day_name}"
  2157. freq = to_offset(freqstr)
  2158. # We _should_ have freq.is_on_offset(end)
  2159. return end, freq