accessors.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580
  1. """
  2. datetimelike delegation
  3. """
  4. from __future__ import annotations
  5. from typing import (
  6. TYPE_CHECKING,
  7. cast,
  8. )
  9. import numpy as np
  10. from pandas.core.dtypes.common import (
  11. is_categorical_dtype,
  12. is_datetime64_dtype,
  13. is_datetime64tz_dtype,
  14. is_integer_dtype,
  15. is_list_like,
  16. is_period_dtype,
  17. is_timedelta64_dtype,
  18. )
  19. from pandas.core.dtypes.generic import ABCSeries
  20. from pandas.core.accessor import (
  21. PandasDelegate,
  22. delegate_names,
  23. )
  24. from pandas.core.arrays import (
  25. DatetimeArray,
  26. PeriodArray,
  27. TimedeltaArray,
  28. )
  29. from pandas.core.arrays.arrow.array import ArrowExtensionArray
  30. from pandas.core.arrays.arrow.dtype import ArrowDtype
  31. from pandas.core.base import (
  32. NoNewAttributesMixin,
  33. PandasObject,
  34. )
  35. from pandas.core.indexes.datetimes import DatetimeIndex
  36. from pandas.core.indexes.timedeltas import TimedeltaIndex
  37. if TYPE_CHECKING:
  38. from pandas import (
  39. DataFrame,
  40. Series,
  41. )
  42. class Properties(PandasDelegate, PandasObject, NoNewAttributesMixin):
  43. _hidden_attrs = PandasObject._hidden_attrs | {
  44. "orig",
  45. "name",
  46. }
  47. def __init__(self, data: Series, orig) -> None:
  48. if not isinstance(data, ABCSeries):
  49. raise TypeError(
  50. f"cannot convert an object of type {type(data)} to a datetimelike index"
  51. )
  52. self._parent = data
  53. self.orig = orig
  54. self.name = getattr(data, "name", None)
  55. self._freeze()
  56. def _get_values(self):
  57. data = self._parent
  58. if is_datetime64_dtype(data.dtype):
  59. return DatetimeIndex(data, copy=False, name=self.name)
  60. elif is_datetime64tz_dtype(data.dtype):
  61. return DatetimeIndex(data, copy=False, name=self.name)
  62. elif is_timedelta64_dtype(data.dtype):
  63. return TimedeltaIndex(data, copy=False, name=self.name)
  64. elif is_period_dtype(data.dtype):
  65. return PeriodArray(data, copy=False)
  66. raise TypeError(
  67. f"cannot convert an object of type {type(data)} to a datetimelike index"
  68. )
  69. def _delegate_property_get(self, name):
  70. from pandas import Series
  71. values = self._get_values()
  72. result = getattr(values, name)
  73. # maybe need to upcast (ints)
  74. if isinstance(result, np.ndarray):
  75. if is_integer_dtype(result):
  76. result = result.astype("int64")
  77. elif not is_list_like(result):
  78. return result
  79. result = np.asarray(result)
  80. if self.orig is not None:
  81. index = self.orig.index
  82. else:
  83. index = self._parent.index
  84. # return the result as a Series
  85. result = Series(result, index=index, name=self.name).__finalize__(self._parent)
  86. # setting this object will show a SettingWithCopyWarning/Error
  87. result._is_copy = (
  88. "modifications to a property of a datetimelike "
  89. "object are not supported and are discarded. "
  90. "Change values on the original."
  91. )
  92. return result
  93. def _delegate_property_set(self, name, value, *args, **kwargs):
  94. raise ValueError(
  95. "modifications to a property of a datetimelike object are not supported. "
  96. "Change values on the original."
  97. )
  98. def _delegate_method(self, name, *args, **kwargs):
  99. from pandas import Series
  100. values = self._get_values()
  101. method = getattr(values, name)
  102. result = method(*args, **kwargs)
  103. if not is_list_like(result):
  104. return result
  105. result = Series(result, index=self._parent.index, name=self.name).__finalize__(
  106. self._parent
  107. )
  108. # setting this object will show a SettingWithCopyWarning/Error
  109. result._is_copy = (
  110. "modifications to a method of a datetimelike "
  111. "object are not supported and are discarded. "
  112. "Change values on the original."
  113. )
  114. return result
  115. @delegate_names(
  116. delegate=ArrowExtensionArray,
  117. accessors=DatetimeArray._datetimelike_ops,
  118. typ="property",
  119. accessor_mapping=lambda x: f"_dt_{x}",
  120. raise_on_missing=False,
  121. )
  122. @delegate_names(
  123. delegate=ArrowExtensionArray,
  124. accessors=DatetimeArray._datetimelike_methods,
  125. typ="method",
  126. accessor_mapping=lambda x: f"_dt_{x}",
  127. raise_on_missing=False,
  128. )
  129. class ArrowTemporalProperties(PandasDelegate, PandasObject, NoNewAttributesMixin):
  130. def __init__(self, data: Series, orig) -> None:
  131. if not isinstance(data, ABCSeries):
  132. raise TypeError(
  133. f"cannot convert an object of type {type(data)} to a datetimelike index"
  134. )
  135. self._parent = data
  136. self._orig = orig
  137. self._freeze()
  138. def _delegate_property_get(self, name: str): # type: ignore[override]
  139. if not hasattr(self._parent.array, f"_dt_{name}"):
  140. raise NotImplementedError(
  141. f"dt.{name} is not supported for {self._parent.dtype}"
  142. )
  143. result = getattr(self._parent.array, f"_dt_{name}")
  144. if not is_list_like(result):
  145. return result
  146. if self._orig is not None:
  147. index = self._orig.index
  148. else:
  149. index = self._parent.index
  150. # return the result as a Series, which is by definition a copy
  151. result = type(self._parent)(
  152. result, index=index, name=self._parent.name
  153. ).__finalize__(self._parent)
  154. return result
  155. def _delegate_method(self, name: str, *args, **kwargs):
  156. if not hasattr(self._parent.array, f"_dt_{name}"):
  157. raise NotImplementedError(
  158. f"dt.{name} is not supported for {self._parent.dtype}"
  159. )
  160. result = getattr(self._parent.array, f"_dt_{name}")(*args, **kwargs)
  161. if self._orig is not None:
  162. index = self._orig.index
  163. else:
  164. index = self._parent.index
  165. # return the result as a Series, which is by definition a copy
  166. result = type(self._parent)(
  167. result, index=index, name=self._parent.name
  168. ).__finalize__(self._parent)
  169. return result
  170. def to_pydatetime(self):
  171. return cast(ArrowExtensionArray, self._parent.array)._dt_to_pydatetime()
  172. def isocalendar(self):
  173. from pandas import DataFrame
  174. result = (
  175. cast(ArrowExtensionArray, self._parent.array)
  176. ._dt_isocalendar()
  177. ._data.combine_chunks()
  178. )
  179. iso_calendar_df = DataFrame(
  180. {
  181. col: type(self._parent.array)(result.field(i)) # type: ignore[call-arg]
  182. for i, col in enumerate(["year", "week", "day"])
  183. }
  184. )
  185. return iso_calendar_df
  186. @delegate_names(
  187. delegate=DatetimeArray,
  188. accessors=DatetimeArray._datetimelike_ops + ["unit"],
  189. typ="property",
  190. )
  191. @delegate_names(
  192. delegate=DatetimeArray,
  193. accessors=DatetimeArray._datetimelike_methods + ["as_unit"],
  194. typ="method",
  195. )
  196. class DatetimeProperties(Properties):
  197. """
  198. Accessor object for datetimelike properties of the Series values.
  199. Examples
  200. --------
  201. >>> seconds_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="s"))
  202. >>> seconds_series
  203. 0 2000-01-01 00:00:00
  204. 1 2000-01-01 00:00:01
  205. 2 2000-01-01 00:00:02
  206. dtype: datetime64[ns]
  207. >>> seconds_series.dt.second
  208. 0 0
  209. 1 1
  210. 2 2
  211. dtype: int32
  212. >>> hours_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="h"))
  213. >>> hours_series
  214. 0 2000-01-01 00:00:00
  215. 1 2000-01-01 01:00:00
  216. 2 2000-01-01 02:00:00
  217. dtype: datetime64[ns]
  218. >>> hours_series.dt.hour
  219. 0 0
  220. 1 1
  221. 2 2
  222. dtype: int32
  223. >>> quarters_series = pd.Series(pd.date_range("2000-01-01", periods=3, freq="q"))
  224. >>> quarters_series
  225. 0 2000-03-31
  226. 1 2000-06-30
  227. 2 2000-09-30
  228. dtype: datetime64[ns]
  229. >>> quarters_series.dt.quarter
  230. 0 1
  231. 1 2
  232. 2 3
  233. dtype: int32
  234. Returns a Series indexed like the original Series.
  235. Raises TypeError if the Series does not contain datetimelike values.
  236. """
  237. def to_pydatetime(self) -> np.ndarray:
  238. """
  239. Return the data as an array of :class:`datetime.datetime` objects.
  240. Timezone information is retained if present.
  241. .. warning::
  242. Python's datetime uses microsecond resolution, which is lower than
  243. pandas (nanosecond). The values are truncated.
  244. Returns
  245. -------
  246. numpy.ndarray
  247. Object dtype array containing native Python datetime objects.
  248. See Also
  249. --------
  250. datetime.datetime : Standard library value for a datetime.
  251. Examples
  252. --------
  253. >>> s = pd.Series(pd.date_range('20180310', periods=2))
  254. >>> s
  255. 0 2018-03-10
  256. 1 2018-03-11
  257. dtype: datetime64[ns]
  258. >>> s.dt.to_pydatetime()
  259. array([datetime.datetime(2018, 3, 10, 0, 0),
  260. datetime.datetime(2018, 3, 11, 0, 0)], dtype=object)
  261. pandas' nanosecond precision is truncated to microseconds.
  262. >>> s = pd.Series(pd.date_range('20180310', periods=2, freq='ns'))
  263. >>> s
  264. 0 2018-03-10 00:00:00.000000000
  265. 1 2018-03-10 00:00:00.000000001
  266. dtype: datetime64[ns]
  267. >>> s.dt.to_pydatetime()
  268. array([datetime.datetime(2018, 3, 10, 0, 0),
  269. datetime.datetime(2018, 3, 10, 0, 0)], dtype=object)
  270. """
  271. return self._get_values().to_pydatetime()
  272. @property
  273. def freq(self):
  274. return self._get_values().inferred_freq
  275. def isocalendar(self) -> DataFrame:
  276. """
  277. Calculate year, week, and day according to the ISO 8601 standard.
  278. .. versionadded:: 1.1.0
  279. Returns
  280. -------
  281. DataFrame
  282. With columns year, week and day.
  283. See Also
  284. --------
  285. Timestamp.isocalendar : Function return a 3-tuple containing ISO year,
  286. week number, and weekday for the given Timestamp object.
  287. datetime.date.isocalendar : Return a named tuple object with
  288. three components: year, week and weekday.
  289. Examples
  290. --------
  291. >>> ser = pd.to_datetime(pd.Series(["2010-01-01", pd.NaT]))
  292. >>> ser.dt.isocalendar()
  293. year week day
  294. 0 2009 53 5
  295. 1 <NA> <NA> <NA>
  296. >>> ser.dt.isocalendar().week
  297. 0 53
  298. 1 <NA>
  299. Name: week, dtype: UInt32
  300. """
  301. return self._get_values().isocalendar().set_index(self._parent.index)
  302. @delegate_names(
  303. delegate=TimedeltaArray, accessors=TimedeltaArray._datetimelike_ops, typ="property"
  304. )
  305. @delegate_names(
  306. delegate=TimedeltaArray,
  307. accessors=TimedeltaArray._datetimelike_methods,
  308. typ="method",
  309. )
  310. class TimedeltaProperties(Properties):
  311. """
  312. Accessor object for datetimelike properties of the Series values.
  313. Returns a Series indexed like the original Series.
  314. Raises TypeError if the Series does not contain datetimelike values.
  315. Examples
  316. --------
  317. >>> seconds_series = pd.Series(
  318. ... pd.timedelta_range(start="1 second", periods=3, freq="S")
  319. ... )
  320. >>> seconds_series
  321. 0 0 days 00:00:01
  322. 1 0 days 00:00:02
  323. 2 0 days 00:00:03
  324. dtype: timedelta64[ns]
  325. >>> seconds_series.dt.seconds
  326. 0 1
  327. 1 2
  328. 2 3
  329. dtype: int32
  330. """
  331. def to_pytimedelta(self) -> np.ndarray:
  332. """
  333. Return an array of native :class:`datetime.timedelta` objects.
  334. Python's standard `datetime` library uses a different representation
  335. timedelta's. This method converts a Series of pandas Timedeltas
  336. to `datetime.timedelta` format with the same length as the original
  337. Series.
  338. Returns
  339. -------
  340. numpy.ndarray
  341. Array of 1D containing data with `datetime.timedelta` type.
  342. See Also
  343. --------
  344. datetime.timedelta : A duration expressing the difference
  345. between two date, time, or datetime.
  346. Examples
  347. --------
  348. >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit="d"))
  349. >>> s
  350. 0 0 days
  351. 1 1 days
  352. 2 2 days
  353. 3 3 days
  354. 4 4 days
  355. dtype: timedelta64[ns]
  356. >>> s.dt.to_pytimedelta()
  357. array([datetime.timedelta(0), datetime.timedelta(days=1),
  358. datetime.timedelta(days=2), datetime.timedelta(days=3),
  359. datetime.timedelta(days=4)], dtype=object)
  360. """
  361. return self._get_values().to_pytimedelta()
  362. @property
  363. def components(self):
  364. """
  365. Return a Dataframe of the components of the Timedeltas.
  366. Returns
  367. -------
  368. DataFrame
  369. Examples
  370. --------
  371. >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit='s'))
  372. >>> s
  373. 0 0 days 00:00:00
  374. 1 0 days 00:00:01
  375. 2 0 days 00:00:02
  376. 3 0 days 00:00:03
  377. 4 0 days 00:00:04
  378. dtype: timedelta64[ns]
  379. >>> s.dt.components
  380. days hours minutes seconds milliseconds microseconds nanoseconds
  381. 0 0 0 0 0 0 0 0
  382. 1 0 0 0 1 0 0 0
  383. 2 0 0 0 2 0 0 0
  384. 3 0 0 0 3 0 0 0
  385. 4 0 0 0 4 0 0 0
  386. """
  387. return (
  388. self._get_values()
  389. .components.set_index(self._parent.index)
  390. .__finalize__(self._parent)
  391. )
  392. @property
  393. def freq(self):
  394. return self._get_values().inferred_freq
  395. @delegate_names(
  396. delegate=PeriodArray, accessors=PeriodArray._datetimelike_ops, typ="property"
  397. )
  398. @delegate_names(
  399. delegate=PeriodArray, accessors=PeriodArray._datetimelike_methods, typ="method"
  400. )
  401. class PeriodProperties(Properties):
  402. """
  403. Accessor object for datetimelike properties of the Series values.
  404. Returns a Series indexed like the original Series.
  405. Raises TypeError if the Series does not contain datetimelike values.
  406. Examples
  407. --------
  408. >>> seconds_series = pd.Series(
  409. ... pd.period_range(
  410. ... start="2000-01-01 00:00:00", end="2000-01-01 00:00:03", freq="s"
  411. ... )
  412. ... )
  413. >>> seconds_series
  414. 0 2000-01-01 00:00:00
  415. 1 2000-01-01 00:00:01
  416. 2 2000-01-01 00:00:02
  417. 3 2000-01-01 00:00:03
  418. dtype: period[S]
  419. >>> seconds_series.dt.second
  420. 0 0
  421. 1 1
  422. 2 2
  423. 3 3
  424. dtype: int64
  425. >>> hours_series = pd.Series(
  426. ... pd.period_range(start="2000-01-01 00:00", end="2000-01-01 03:00", freq="h")
  427. ... )
  428. >>> hours_series
  429. 0 2000-01-01 00:00
  430. 1 2000-01-01 01:00
  431. 2 2000-01-01 02:00
  432. 3 2000-01-01 03:00
  433. dtype: period[H]
  434. >>> hours_series.dt.hour
  435. 0 0
  436. 1 1
  437. 2 2
  438. 3 3
  439. dtype: int64
  440. >>> quarters_series = pd.Series(
  441. ... pd.period_range(start="2000-01-01", end="2000-12-31", freq="Q-DEC")
  442. ... )
  443. >>> quarters_series
  444. 0 2000Q1
  445. 1 2000Q2
  446. 2 2000Q3
  447. 3 2000Q4
  448. dtype: period[Q-DEC]
  449. >>> quarters_series.dt.quarter
  450. 0 1
  451. 1 2
  452. 2 3
  453. 3 4
  454. dtype: int64
  455. """
  456. class CombinedDatetimelikeProperties(
  457. DatetimeProperties, TimedeltaProperties, PeriodProperties
  458. ):
  459. def __new__(cls, data: Series):
  460. # CombinedDatetimelikeProperties isn't really instantiated. Instead
  461. # we need to choose which parent (datetime or timedelta) is
  462. # appropriate. Since we're checking the dtypes anyway, we'll just
  463. # do all the validation here.
  464. if not isinstance(data, ABCSeries):
  465. raise TypeError(
  466. f"cannot convert an object of type {type(data)} to a datetimelike index"
  467. )
  468. orig = data if is_categorical_dtype(data.dtype) else None
  469. if orig is not None:
  470. data = data._constructor(
  471. orig.array,
  472. name=orig.name,
  473. copy=False,
  474. dtype=orig._values.categories.dtype,
  475. index=orig.index,
  476. )
  477. if isinstance(data.dtype, ArrowDtype) and data.dtype.kind == "M":
  478. return ArrowTemporalProperties(data, orig)
  479. if is_datetime64_dtype(data.dtype):
  480. return DatetimeProperties(data, orig)
  481. elif is_datetime64tz_dtype(data.dtype):
  482. return DatetimeProperties(data, orig)
  483. elif is_timedelta64_dtype(data.dtype):
  484. return TimedeltaProperties(data, orig)
  485. elif is_period_dtype(data.dtype):
  486. return PeriodProperties(data, orig)
  487. raise AttributeError("Can only use .dt accessor with datetimelike values")