123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595 |
- from __future__ import annotations
- from datetime import (
- datetime,
- time,
- timedelta,
- tzinfo,
- )
- from typing import (
- TYPE_CHECKING,
- Iterator,
- cast,
- )
- import warnings
- import numpy as np
- from pandas._libs import (
- lib,
- tslib,
- )
- from pandas._libs.tslibs import (
- BaseOffset,
- NaT,
- NaTType,
- Resolution,
- Timestamp,
- astype_overflowsafe,
- fields,
- get_resolution,
- get_supported_reso,
- get_unit_from_dtype,
- ints_to_pydatetime,
- is_date_array_normalized,
- is_supported_unit,
- is_unitless,
- normalize_i8_timestamps,
- npy_unit_to_abbrev,
- timezones,
- to_offset,
- tz_convert_from_utc,
- tzconversion,
- )
- from pandas._libs.tslibs.dtypes import abbrev_to_npy_unit
- from pandas._typing import (
- DateTimeErrorChoices,
- IntervalClosedType,
- TimeAmbiguous,
- TimeNonexistent,
- npt,
- )
- from pandas.errors import PerformanceWarning
- from pandas.util._exceptions import find_stack_level
- from pandas.util._validators import validate_inclusive
- from pandas.core.dtypes.common import (
- DT64NS_DTYPE,
- INT64_DTYPE,
- is_bool_dtype,
- is_datetime64_any_dtype,
- is_datetime64_dtype,
- is_datetime64tz_dtype,
- is_dtype_equal,
- is_extension_array_dtype,
- is_float_dtype,
- is_object_dtype,
- is_period_dtype,
- is_sparse,
- is_string_dtype,
- is_timedelta64_dtype,
- pandas_dtype,
- )
- from pandas.core.dtypes.dtypes import (
- DatetimeTZDtype,
- ExtensionDtype,
- )
- from pandas.core.dtypes.missing import isna
- from pandas.core.arrays import datetimelike as dtl
- from pandas.core.arrays._ranges import generate_regular_range
- import pandas.core.common as com
- from pandas.tseries.frequencies import get_period_alias
- from pandas.tseries.offsets import (
- Day,
- Tick,
- )
- if TYPE_CHECKING:
- from pandas import DataFrame
- from pandas.core.arrays import PeriodArray
- _midnight = time(0, 0)
- def tz_to_dtype(tz: tzinfo | None, unit: str = "ns"):
- """
- Return a datetime64[ns] dtype appropriate for the given timezone.
- Parameters
- ----------
- tz : tzinfo or None
- unit : str, default "ns"
- Returns
- -------
- np.dtype or Datetime64TZDType
- """
- if tz is None:
- return np.dtype(f"M8[{unit}]")
- else:
- return DatetimeTZDtype(tz=tz, unit=unit)
- def _field_accessor(name: str, field: str, docstring=None):
- def f(self):
- values = self._local_timestamps()
- if field in self._bool_ops:
- result: np.ndarray
- if field.endswith(("start", "end")):
- freq = self.freq
- month_kw = 12
- if freq:
- kwds = freq.kwds
- month_kw = kwds.get("startingMonth", kwds.get("month", 12))
- result = fields.get_start_end_field(
- values, field, self.freqstr, month_kw, reso=self._creso
- )
- else:
- result = fields.get_date_field(values, field, reso=self._creso)
- # these return a boolean by-definition
- return result
- if field in self._object_ops:
- result = fields.get_date_name_field(values, field, reso=self._creso)
- result = self._maybe_mask_results(result, fill_value=None)
- else:
- result = fields.get_date_field(values, field, reso=self._creso)
- result = self._maybe_mask_results(
- result, fill_value=None, convert="float64"
- )
- return result
- f.__name__ = name
- f.__doc__ = docstring
- return property(f)
- class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps):
- """
- Pandas ExtensionArray for tz-naive or tz-aware datetime data.
- .. warning::
- DatetimeArray is currently experimental, and its API may change
- without warning. In particular, :attr:`DatetimeArray.dtype` is
- expected to change to always be an instance of an ``ExtensionDtype``
- subclass.
- Parameters
- ----------
- values : Series, Index, DatetimeArray, ndarray
- The datetime data.
- For DatetimeArray `values` (or a Series or Index boxing one),
- `dtype` and `freq` will be extracted from `values`.
- dtype : numpy.dtype or DatetimeTZDtype
- Note that the only NumPy dtype allowed is 'datetime64[ns]'.
- freq : str or Offset, optional
- The frequency.
- copy : bool, default False
- Whether to copy the underlying array of values.
- Attributes
- ----------
- None
- Methods
- -------
- None
- """
- _typ = "datetimearray"
- _internal_fill_value = np.datetime64("NaT", "ns")
- _recognized_scalars = (datetime, np.datetime64)
- _is_recognized_dtype = is_datetime64_any_dtype
- _infer_matches = ("datetime", "datetime64", "date")
- @property
- def _scalar_type(self) -> type[Timestamp]:
- return Timestamp
- # define my properties & methods for delegation
- _bool_ops: list[str] = [
- "is_month_start",
- "is_month_end",
- "is_quarter_start",
- "is_quarter_end",
- "is_year_start",
- "is_year_end",
- "is_leap_year",
- ]
- _object_ops: list[str] = ["freq", "tz"]
- _field_ops: list[str] = [
- "year",
- "month",
- "day",
- "hour",
- "minute",
- "second",
- "weekday",
- "dayofweek",
- "day_of_week",
- "dayofyear",
- "day_of_year",
- "quarter",
- "days_in_month",
- "daysinmonth",
- "microsecond",
- "nanosecond",
- ]
- _other_ops: list[str] = ["date", "time", "timetz"]
- _datetimelike_ops: list[str] = (
- _field_ops + _object_ops + _bool_ops + _other_ops + ["unit"]
- )
- _datetimelike_methods: list[str] = [
- "to_period",
- "tz_localize",
- "tz_convert",
- "normalize",
- "strftime",
- "round",
- "floor",
- "ceil",
- "month_name",
- "day_name",
- "as_unit",
- ]
- # ndim is inherited from ExtensionArray, must exist to ensure
- # Timestamp.__richcmp__(DateTimeArray) operates pointwise
- # ensure that operations with numpy arrays defer to our implementation
- __array_priority__ = 1000
- # -----------------------------------------------------------------
- # Constructors
- _dtype: np.dtype | DatetimeTZDtype
- _freq: BaseOffset | None = None
- _default_dtype = DT64NS_DTYPE # used in TimeLikeOps.__init__
- @classmethod
- def _validate_dtype(cls, values, dtype):
- # used in TimeLikeOps.__init__
- _validate_dt64_dtype(values.dtype)
- dtype = _validate_dt64_dtype(dtype)
- return dtype
- # error: Signature of "_simple_new" incompatible with supertype "NDArrayBacked"
- @classmethod
- def _simple_new( # type: ignore[override]
- cls,
- values: np.ndarray,
- freq: BaseOffset | None = None,
- dtype=DT64NS_DTYPE,
- ) -> DatetimeArray:
- assert isinstance(values, np.ndarray)
- assert dtype.kind == "M"
- if isinstance(dtype, np.dtype):
- assert dtype == values.dtype
- assert not is_unitless(dtype)
- else:
- # DatetimeTZDtype. If we have e.g. DatetimeTZDtype[us, UTC],
- # then values.dtype should be M8[us].
- assert dtype._creso == get_unit_from_dtype(values.dtype)
- result = super()._simple_new(values, dtype)
- result._freq = freq
- return result
- @classmethod
- def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False):
- return cls._from_sequence_not_strict(scalars, dtype=dtype, copy=copy)
- @classmethod
- def _from_sequence_not_strict(
- cls,
- data,
- *,
- dtype=None,
- copy: bool = False,
- tz=lib.no_default,
- freq: str | BaseOffset | lib.NoDefault | None = lib.no_default,
- dayfirst: bool = False,
- yearfirst: bool = False,
- ambiguous: TimeAmbiguous = "raise",
- ):
- """
- A non-strict version of _from_sequence, called from DatetimeIndex.__new__.
- """
- explicit_none = freq is None
- freq = freq if freq is not lib.no_default else None
- freq, freq_infer = dtl.maybe_infer_freq(freq)
- # if the user either explicitly passes tz=None or a tz-naive dtype, we
- # disallows inferring a tz.
- explicit_tz_none = tz is None
- if tz is lib.no_default:
- tz = None
- else:
- tz = timezones.maybe_get_tz(tz)
- dtype = _validate_dt64_dtype(dtype)
- # if dtype has an embedded tz, capture it
- tz = _validate_tz_from_dtype(dtype, tz, explicit_tz_none)
- unit = None
- if dtype is not None:
- if isinstance(dtype, np.dtype):
- unit = np.datetime_data(dtype)[0]
- else:
- # DatetimeTZDtype
- unit = dtype.unit
- subarr, tz, inferred_freq = _sequence_to_dt64ns(
- data,
- copy=copy,
- tz=tz,
- dayfirst=dayfirst,
- yearfirst=yearfirst,
- ambiguous=ambiguous,
- out_unit=unit,
- )
- # We have to call this again after possibly inferring a tz above
- _validate_tz_from_dtype(dtype, tz, explicit_tz_none)
- if tz is not None and explicit_tz_none:
- raise ValueError(
- "Passed data is timezone-aware, incompatible with 'tz=None'. "
- "Use obj.tz_localize(None) instead."
- )
- freq, freq_infer = dtl.validate_inferred_freq(freq, inferred_freq, freq_infer)
- if explicit_none:
- freq = None
- data_unit = np.datetime_data(subarr.dtype)[0]
- data_dtype = tz_to_dtype(tz, data_unit)
- result = cls._simple_new(subarr, freq=freq, dtype=data_dtype)
- if unit is not None and unit != result.unit:
- # If unit was specified in user-passed dtype, cast to it here
- result = result.as_unit(unit)
- if inferred_freq is None and freq is not None:
- # this condition precludes `freq_infer`
- cls._validate_frequency(result, freq, ambiguous=ambiguous)
- elif freq_infer:
- # Set _freq directly to bypass duplicative _validate_frequency
- # check.
- result._freq = to_offset(result.inferred_freq)
- return result
- # error: Signature of "_generate_range" incompatible with supertype
- # "DatetimeLikeArrayMixin"
- @classmethod
- def _generate_range( # type: ignore[override]
- cls,
- start,
- end,
- periods,
- freq,
- tz=None,
- normalize: bool = False,
- ambiguous: TimeAmbiguous = "raise",
- nonexistent: TimeNonexistent = "raise",
- inclusive: IntervalClosedType = "both",
- *,
- unit: str | None = None,
- ) -> DatetimeArray:
- periods = dtl.validate_periods(periods)
- if freq is None and any(x is None for x in [periods, start, end]):
- raise ValueError("Must provide freq argument if no data is supplied")
- if com.count_not_none(start, end, periods, freq) != 3:
- raise ValueError(
- "Of the four parameters: start, end, periods, "
- "and freq, exactly three must be specified"
- )
- freq = to_offset(freq)
- if start is not None:
- start = Timestamp(start)
- if end is not None:
- end = Timestamp(end)
- if start is NaT or end is NaT:
- raise ValueError("Neither `start` nor `end` can be NaT")
- if unit is not None:
- if unit not in ["s", "ms", "us", "ns"]:
- raise ValueError("'unit' must be one of 's', 'ms', 'us', 'ns'")
- else:
- unit = "ns"
- if start is not None and unit is not None:
- start = start.as_unit(unit, round_ok=False)
- if end is not None and unit is not None:
- end = end.as_unit(unit, round_ok=False)
- left_inclusive, right_inclusive = validate_inclusive(inclusive)
- start, end = _maybe_normalize_endpoints(start, end, normalize)
- tz = _infer_tz_from_endpoints(start, end, tz)
- if tz is not None:
- # Localize the start and end arguments
- start_tz = None if start is None else start.tz
- end_tz = None if end is None else end.tz
- start = _maybe_localize_point(
- start, start_tz, start, freq, tz, ambiguous, nonexistent
- )
- end = _maybe_localize_point(
- end, end_tz, end, freq, tz, ambiguous, nonexistent
- )
- if freq is not None:
- # We break Day arithmetic (fixed 24 hour) here and opt for
- # Day to mean calendar day (23/24/25 hour). Therefore, strip
- # tz info from start and day to avoid DST arithmetic
- if isinstance(freq, Day):
- if start is not None:
- start = start.tz_localize(None)
- if end is not None:
- end = end.tz_localize(None)
- if isinstance(freq, Tick):
- i8values = generate_regular_range(start, end, periods, freq, unit=unit)
- else:
- xdr = _generate_range(
- start=start, end=end, periods=periods, offset=freq, unit=unit
- )
- i8values = np.array([x._value for x in xdr], dtype=np.int64)
- endpoint_tz = start.tz if start is not None else end.tz
- if tz is not None and endpoint_tz is None:
- if not timezones.is_utc(tz):
- # short-circuit tz_localize_to_utc which would make
- # an unnecessary copy with UTC but be a no-op.
- creso = abbrev_to_npy_unit(unit)
- i8values = tzconversion.tz_localize_to_utc(
- i8values,
- tz,
- ambiguous=ambiguous,
- nonexistent=nonexistent,
- creso=creso,
- )
- # i8values is localized datetime64 array -> have to convert
- # start/end as well to compare
- if start is not None:
- start = start.tz_localize(tz, ambiguous, nonexistent)
- if end is not None:
- end = end.tz_localize(tz, ambiguous, nonexistent)
- else:
- # Create a linearly spaced date_range in local time
- # Nanosecond-granularity timestamps aren't always correctly
- # representable with doubles, so we limit the range that we
- # pass to np.linspace as much as possible
- i8values = (
- np.linspace(0, end._value - start._value, periods, dtype="int64")
- + start._value
- )
- if i8values.dtype != "i8":
- # 2022-01-09 I (brock) am not sure if it is possible for this
- # to overflow and cast to e.g. f8, but if it does we need to cast
- i8values = i8values.astype("i8")
- if start == end:
- if not left_inclusive and not right_inclusive:
- i8values = i8values[1:-1]
- else:
- start_i8 = Timestamp(start)._value
- end_i8 = Timestamp(end)._value
- if not left_inclusive or not right_inclusive:
- if not left_inclusive and len(i8values) and i8values[0] == start_i8:
- i8values = i8values[1:]
- if not right_inclusive and len(i8values) and i8values[-1] == end_i8:
- i8values = i8values[:-1]
- dt64_values = i8values.view(f"datetime64[{unit}]")
- dtype = tz_to_dtype(tz, unit=unit)
- return cls._simple_new(dt64_values, freq=freq, dtype=dtype)
- # -----------------------------------------------------------------
- # DatetimeLike Interface
- def _unbox_scalar(self, value) -> np.datetime64:
- if not isinstance(value, self._scalar_type) and value is not NaT:
- raise ValueError("'value' should be a Timestamp.")
- self._check_compatible_with(value)
- if value is NaT:
- return np.datetime64(value._value, self.unit)
- else:
- return value.as_unit(self.unit).asm8
- def _scalar_from_string(self, value) -> Timestamp | NaTType:
- return Timestamp(value, tz=self.tz)
- def _check_compatible_with(self, other) -> None:
- if other is NaT:
- return
- self._assert_tzawareness_compat(other)
- # -----------------------------------------------------------------
- # Descriptive Properties
- def _box_func(self, x: np.datetime64) -> Timestamp | NaTType:
- # GH#42228
- value = x.view("i8")
- ts = Timestamp._from_value_and_reso(value, reso=self._creso, tz=self.tz)
- return ts
- @property
- # error: Return type "Union[dtype, DatetimeTZDtype]" of "dtype"
- # incompatible with return type "ExtensionDtype" in supertype
- # "ExtensionArray"
- def dtype(self) -> np.dtype | DatetimeTZDtype: # type: ignore[override]
- """
- The dtype for the DatetimeArray.
- .. warning::
- A future version of pandas will change dtype to never be a
- ``numpy.dtype``. Instead, :attr:`DatetimeArray.dtype` will
- always be an instance of an ``ExtensionDtype`` subclass.
- Returns
- -------
- numpy.dtype or DatetimeTZDtype
- If the values are tz-naive, then ``np.dtype('datetime64[ns]')``
- is returned.
- If the values are tz-aware, then the ``DatetimeTZDtype``
- is returned.
- """
- return self._dtype
- @property
- def tz(self) -> tzinfo | None:
- """
- Return the timezone.
- Returns
- -------
- datetime.tzinfo, pytz.tzinfo.BaseTZInfo, dateutil.tz.tz.tzfile, or None
- Returns None when the array is tz-naive.
- """
- # GH 18595
- return getattr(self.dtype, "tz", None)
- @tz.setter
- def tz(self, value):
- # GH 3746: Prevent localizing or converting the index by setting tz
- raise AttributeError(
- "Cannot directly set timezone. Use tz_localize() "
- "or tz_convert() as appropriate"
- )
- @property
- def tzinfo(self) -> tzinfo | None:
- """
- Alias for tz attribute
- """
- return self.tz
- @property # NB: override with cache_readonly in immutable subclasses
- def is_normalized(self) -> bool:
- """
- Returns True if all of the dates are at midnight ("no time")
- """
- return is_date_array_normalized(self.asi8, self.tz, reso=self._creso)
- @property # NB: override with cache_readonly in immutable subclasses
- def _resolution_obj(self) -> Resolution:
- return get_resolution(self.asi8, self.tz, reso=self._creso)
- # ----------------------------------------------------------------
- # Array-Like / EA-Interface Methods
- def __array__(self, dtype=None) -> np.ndarray:
- if dtype is None and self.tz:
- # The default for tz-aware is object, to preserve tz info
- dtype = object
- return super().__array__(dtype=dtype)
- def __iter__(self) -> Iterator:
- """
- Return an iterator over the boxed values
- Yields
- ------
- tstamp : Timestamp
- """
- if self.ndim > 1:
- for i in range(len(self)):
- yield self[i]
- else:
- # convert in chunks of 10k for efficiency
- data = self.asi8
- length = len(self)
- chunksize = 10000
- chunks = (length // chunksize) + 1
- for i in range(chunks):
- start_i = i * chunksize
- end_i = min((i + 1) * chunksize, length)
- converted = ints_to_pydatetime(
- data[start_i:end_i],
- tz=self.tz,
- box="timestamp",
- reso=self._creso,
- )
- yield from converted
- def astype(self, dtype, copy: bool = True):
- # We handle
- # --> datetime
- # --> period
- # DatetimeLikeArrayMixin Super handles the rest.
- dtype = pandas_dtype(dtype)
- if is_dtype_equal(dtype, self.dtype):
- if copy:
- return self.copy()
- return self
- elif isinstance(dtype, ExtensionDtype):
- if not isinstance(dtype, DatetimeTZDtype):
- # e.g. Sparse[datetime64[ns]]
- return super().astype(dtype, copy=copy)
- elif self.tz is None:
- # pre-2.0 this did self.tz_localize(dtype.tz), which did not match
- # the Series behavior which did
- # values.tz_localize("UTC").tz_convert(dtype.tz)
- raise TypeError(
- "Cannot use .astype to convert from timezone-naive dtype to "
- "timezone-aware dtype. Use obj.tz_localize instead or "
- "series.dt.tz_localize instead"
- )
- else:
- # tzaware unit conversion e.g. datetime64[s, UTC]
- np_dtype = np.dtype(dtype.str)
- res_values = astype_overflowsafe(self._ndarray, np_dtype, copy=copy)
- return type(self)._simple_new(res_values, dtype=dtype, freq=self.freq)
- elif (
- self.tz is None
- and is_datetime64_dtype(dtype)
- and not is_unitless(dtype)
- and is_supported_unit(get_unit_from_dtype(dtype))
- ):
- # unit conversion e.g. datetime64[s]
- res_values = astype_overflowsafe(self._ndarray, dtype, copy=True)
- return type(self)._simple_new(res_values, dtype=res_values.dtype)
- # TODO: preserve freq?
- elif self.tz is not None and is_datetime64_dtype(dtype):
- # pre-2.0 behavior for DTA/DTI was
- # values.tz_convert("UTC").tz_localize(None), which did not match
- # the Series behavior
- raise TypeError(
- "Cannot use .astype to convert from timezone-aware dtype to "
- "timezone-naive dtype. Use obj.tz_localize(None) or "
- "obj.tz_convert('UTC').tz_localize(None) instead."
- )
- elif (
- self.tz is None
- and is_datetime64_dtype(dtype)
- and dtype != self.dtype
- and is_unitless(dtype)
- ):
- raise TypeError(
- "Casting to unit-less dtype 'datetime64' is not supported. "
- "Pass e.g. 'datetime64[ns]' instead."
- )
- elif is_period_dtype(dtype):
- return self.to_period(freq=dtype.freq)
- return dtl.DatetimeLikeArrayMixin.astype(self, dtype, copy)
- # -----------------------------------------------------------------
- # Rendering Methods
- def _format_native_types(
- self, *, na_rep: str | float = "NaT", date_format=None, **kwargs
- ) -> npt.NDArray[np.object_]:
- from pandas.io.formats.format import get_format_datetime64_from_values
- fmt = get_format_datetime64_from_values(self, date_format)
- return tslib.format_array_from_datetime(
- self.asi8, tz=self.tz, format=fmt, na_rep=na_rep, reso=self._creso
- )
- # -----------------------------------------------------------------
- # Comparison Methods
- def _has_same_tz(self, other) -> bool:
- # vzone shouldn't be None if value is non-datetime like
- if isinstance(other, np.datetime64):
- # convert to Timestamp as np.datetime64 doesn't have tz attr
- other = Timestamp(other)
- if not hasattr(other, "tzinfo"):
- return False
- other_tz = other.tzinfo
- return timezones.tz_compare(self.tzinfo, other_tz)
- def _assert_tzawareness_compat(self, other) -> None:
- # adapted from _Timestamp._assert_tzawareness_compat
- other_tz = getattr(other, "tzinfo", None)
- other_dtype = getattr(other, "dtype", None)
- if is_datetime64tz_dtype(other_dtype):
- # Get tzinfo from Series dtype
- other_tz = other.dtype.tz
- if other is NaT:
- # pd.NaT quacks both aware and naive
- pass
- elif self.tz is None:
- if other_tz is not None:
- raise TypeError(
- "Cannot compare tz-naive and tz-aware datetime-like objects."
- )
- elif other_tz is None:
- raise TypeError(
- "Cannot compare tz-naive and tz-aware datetime-like objects"
- )
- # -----------------------------------------------------------------
- # Arithmetic Methods
- def _add_offset(self, offset) -> DatetimeArray:
- assert not isinstance(offset, Tick)
- if self.tz is not None:
- values = self.tz_localize(None)
- else:
- values = self
- try:
- result = offset._apply_array(values).view(values.dtype)
- except NotImplementedError:
- warnings.warn(
- "Non-vectorized DateOffset being applied to Series or DatetimeIndex.",
- PerformanceWarning,
- stacklevel=find_stack_level(),
- )
- result = self.astype("O") + offset
- result = type(self)._from_sequence(result).as_unit(self.unit)
- if not len(self):
- # GH#30336 _from_sequence won't be able to infer self.tz
- return result.tz_localize(self.tz)
- else:
- result = DatetimeArray._simple_new(result, dtype=result.dtype)
- if self.tz is not None:
- result = result.tz_localize(self.tz)
- return result
- # -----------------------------------------------------------------
- # Timezone Conversion and Localization Methods
- def _local_timestamps(self) -> npt.NDArray[np.int64]:
- """
- Convert to an i8 (unix-like nanosecond timestamp) representation
- while keeping the local timezone and not using UTC.
- This is used to calculate time-of-day information as if the timestamps
- were timezone-naive.
- """
- if self.tz is None or timezones.is_utc(self.tz):
- # Avoid the copy that would be made in tzconversion
- return self.asi8
- return tz_convert_from_utc(self.asi8, self.tz, reso=self._creso)
- def tz_convert(self, tz) -> DatetimeArray:
- """
- Convert tz-aware Datetime Array/Index from one time zone to another.
- Parameters
- ----------
- tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None
- Time zone for time. Corresponding timestamps would be converted
- to this time zone of the Datetime Array/Index. A `tz` of None will
- convert to UTC and remove the timezone information.
- Returns
- -------
- Array or Index
- Raises
- ------
- TypeError
- If Datetime Array/Index is tz-naive.
- See Also
- --------
- DatetimeIndex.tz : A timezone that has a variable offset from UTC.
- DatetimeIndex.tz_localize : Localize tz-naive DatetimeIndex to a
- given time zone, or remove timezone from a tz-aware DatetimeIndex.
- Examples
- --------
- With the `tz` parameter, we can change the DatetimeIndex
- to other time zones:
- >>> dti = pd.date_range(start='2014-08-01 09:00',
- ... freq='H', periods=3, tz='Europe/Berlin')
- >>> dti
- DatetimeIndex(['2014-08-01 09:00:00+02:00',
- '2014-08-01 10:00:00+02:00',
- '2014-08-01 11:00:00+02:00'],
- dtype='datetime64[ns, Europe/Berlin]', freq='H')
- >>> dti.tz_convert('US/Central')
- DatetimeIndex(['2014-08-01 02:00:00-05:00',
- '2014-08-01 03:00:00-05:00',
- '2014-08-01 04:00:00-05:00'],
- dtype='datetime64[ns, US/Central]', freq='H')
- With the ``tz=None``, we can remove the timezone (after converting
- to UTC if necessary):
- >>> dti = pd.date_range(start='2014-08-01 09:00', freq='H',
- ... periods=3, tz='Europe/Berlin')
- >>> dti
- DatetimeIndex(['2014-08-01 09:00:00+02:00',
- '2014-08-01 10:00:00+02:00',
- '2014-08-01 11:00:00+02:00'],
- dtype='datetime64[ns, Europe/Berlin]', freq='H')
- >>> dti.tz_convert(None)
- DatetimeIndex(['2014-08-01 07:00:00',
- '2014-08-01 08:00:00',
- '2014-08-01 09:00:00'],
- dtype='datetime64[ns]', freq='H')
- """
- tz = timezones.maybe_get_tz(tz)
- if self.tz is None:
- # tz naive, use tz_localize
- raise TypeError(
- "Cannot convert tz-naive timestamps, use tz_localize to localize"
- )
- # No conversion since timestamps are all UTC to begin with
- dtype = tz_to_dtype(tz, unit=self.unit)
- return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq)
- @dtl.ravel_compat
- def tz_localize(
- self,
- tz,
- ambiguous: TimeAmbiguous = "raise",
- nonexistent: TimeNonexistent = "raise",
- ) -> DatetimeArray:
- """
- Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index.
- This method takes a time zone (tz) naive Datetime Array/Index object
- and makes this time zone aware. It does not move the time to another
- time zone.
- This method can also be used to do the inverse -- to create a time
- zone unaware object from an aware object. To that end, pass `tz=None`.
- Parameters
- ----------
- tz : str, pytz.timezone, dateutil.tz.tzfile, datetime.tzinfo or None
- Time zone to convert timestamps to. Passing ``None`` will
- remove the time zone information preserving local time.
- ambiguous : 'infer', 'NaT', bool array, default 'raise'
- When clocks moved backward due to DST, ambiguous times may arise.
- For example in Central European Time (UTC+01), when going from
- 03:00 DST to 02:00 non-DST, 02:30:00 local time occurs both at
- 00:30:00 UTC and at 01:30:00 UTC. In such a situation, the
- `ambiguous` parameter dictates how ambiguous times should be
- handled.
- - 'infer' will attempt to infer fall dst-transition hours based on
- order
- - bool-ndarray where True signifies a DST time, False signifies a
- non-DST time (note that this flag is only applicable for
- ambiguous times)
- - 'NaT' will return NaT where there are ambiguous times
- - 'raise' will raise an AmbiguousTimeError if there are ambiguous
- times.
- nonexistent : 'shift_forward', 'shift_backward, 'NaT', timedelta, \
- default 'raise'
- A nonexistent time does not exist in a particular timezone
- where clocks moved forward due to DST.
- - 'shift_forward' will shift the nonexistent time forward to the
- closest existing time
- - 'shift_backward' will shift the nonexistent time backward to the
- closest existing time
- - 'NaT' will return NaT where there are nonexistent times
- - timedelta objects will shift nonexistent times by the timedelta
- - 'raise' will raise an NonExistentTimeError if there are
- nonexistent times.
- Returns
- -------
- Same type as self
- Array/Index converted to the specified time zone.
- Raises
- ------
- TypeError
- If the Datetime Array/Index is tz-aware and tz is not None.
- See Also
- --------
- DatetimeIndex.tz_convert : Convert tz-aware DatetimeIndex from
- one time zone to another.
- Examples
- --------
- >>> tz_naive = pd.date_range('2018-03-01 09:00', periods=3)
- >>> tz_naive
- DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',
- '2018-03-03 09:00:00'],
- dtype='datetime64[ns]', freq='D')
- Localize DatetimeIndex in US/Eastern time zone:
- >>> tz_aware = tz_naive.tz_localize(tz='US/Eastern')
- >>> tz_aware
- DatetimeIndex(['2018-03-01 09:00:00-05:00',
- '2018-03-02 09:00:00-05:00',
- '2018-03-03 09:00:00-05:00'],
- dtype='datetime64[ns, US/Eastern]', freq=None)
- With the ``tz=None``, we can remove the time zone information
- while keeping the local time (not converted to UTC):
- >>> tz_aware.tz_localize(None)
- DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00',
- '2018-03-03 09:00:00'],
- dtype='datetime64[ns]', freq=None)
- Be careful with DST changes. When there is sequential data, pandas can
- infer the DST time:
- >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:30:00',
- ... '2018-10-28 02:00:00',
- ... '2018-10-28 02:30:00',
- ... '2018-10-28 02:00:00',
- ... '2018-10-28 02:30:00',
- ... '2018-10-28 03:00:00',
- ... '2018-10-28 03:30:00']))
- >>> s.dt.tz_localize('CET', ambiguous='infer')
- 0 2018-10-28 01:30:00+02:00
- 1 2018-10-28 02:00:00+02:00
- 2 2018-10-28 02:30:00+02:00
- 3 2018-10-28 02:00:00+01:00
- 4 2018-10-28 02:30:00+01:00
- 5 2018-10-28 03:00:00+01:00
- 6 2018-10-28 03:30:00+01:00
- dtype: datetime64[ns, CET]
- In some cases, inferring the DST is impossible. In such cases, you can
- pass an ndarray to the ambiguous parameter to set the DST explicitly
- >>> s = pd.to_datetime(pd.Series(['2018-10-28 01:20:00',
- ... '2018-10-28 02:36:00',
- ... '2018-10-28 03:46:00']))
- >>> s.dt.tz_localize('CET', ambiguous=np.array([True, True, False]))
- 0 2018-10-28 01:20:00+02:00
- 1 2018-10-28 02:36:00+02:00
- 2 2018-10-28 03:46:00+01:00
- dtype: datetime64[ns, CET]
- If the DST transition causes nonexistent times, you can shift these
- dates forward or backwards with a timedelta object or `'shift_forward'`
- or `'shift_backwards'`.
- >>> s = pd.to_datetime(pd.Series(['2015-03-29 02:30:00',
- ... '2015-03-29 03:30:00']))
- >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_forward')
- 0 2015-03-29 03:00:00+02:00
- 1 2015-03-29 03:30:00+02:00
- dtype: datetime64[ns, Europe/Warsaw]
- >>> s.dt.tz_localize('Europe/Warsaw', nonexistent='shift_backward')
- 0 2015-03-29 01:59:59.999999999+01:00
- 1 2015-03-29 03:30:00+02:00
- dtype: datetime64[ns, Europe/Warsaw]
- >>> s.dt.tz_localize('Europe/Warsaw', nonexistent=pd.Timedelta('1H'))
- 0 2015-03-29 03:30:00+02:00
- 1 2015-03-29 03:30:00+02:00
- dtype: datetime64[ns, Europe/Warsaw]
- """
- nonexistent_options = ("raise", "NaT", "shift_forward", "shift_backward")
- if nonexistent not in nonexistent_options and not isinstance(
- nonexistent, timedelta
- ):
- raise ValueError(
- "The nonexistent argument must be one of 'raise', "
- "'NaT', 'shift_forward', 'shift_backward' or "
- "a timedelta object"
- )
- if self.tz is not None:
- if tz is None:
- new_dates = tz_convert_from_utc(self.asi8, self.tz, reso=self._creso)
- else:
- raise TypeError("Already tz-aware, use tz_convert to convert.")
- else:
- tz = timezones.maybe_get_tz(tz)
- # Convert to UTC
- new_dates = tzconversion.tz_localize_to_utc(
- self.asi8,
- tz,
- ambiguous=ambiguous,
- nonexistent=nonexistent,
- creso=self._creso,
- )
- new_dates = new_dates.view(f"M8[{self.unit}]")
- dtype = tz_to_dtype(tz, unit=self.unit)
- freq = None
- if timezones.is_utc(tz) or (len(self) == 1 and not isna(new_dates[0])):
- # we can preserve freq
- # TODO: Also for fixed-offsets
- freq = self.freq
- elif tz is None and self.tz is None:
- # no-op
- freq = self.freq
- return self._simple_new(new_dates, dtype=dtype, freq=freq)
- # ----------------------------------------------------------------
- # Conversion Methods - Vectorized analogues of Timestamp methods
- def to_pydatetime(self) -> npt.NDArray[np.object_]:
- """
- Return an ndarray of datetime.datetime objects.
- Returns
- -------
- numpy.ndarray
- """
- return ints_to_pydatetime(self.asi8, tz=self.tz, reso=self._creso)
- def normalize(self) -> DatetimeArray:
- """
- Convert times to midnight.
- The time component of the date-time is converted to midnight i.e.
- 00:00:00. This is useful in cases, when the time does not matter.
- Length is unaltered. The timezones are unaffected.
- This method is available on Series with datetime values under
- the ``.dt`` accessor, and directly on Datetime Array/Index.
- Returns
- -------
- DatetimeArray, DatetimeIndex or Series
- The same type as the original data. Series will have the same
- name and index. DatetimeIndex will have the same name.
- See Also
- --------
- floor : Floor the datetimes to the specified freq.
- ceil : Ceil the datetimes to the specified freq.
- round : Round the datetimes to the specified freq.
- Examples
- --------
- >>> idx = pd.date_range(start='2014-08-01 10:00', freq='H',
- ... periods=3, tz='Asia/Calcutta')
- >>> idx
- DatetimeIndex(['2014-08-01 10:00:00+05:30',
- '2014-08-01 11:00:00+05:30',
- '2014-08-01 12:00:00+05:30'],
- dtype='datetime64[ns, Asia/Calcutta]', freq='H')
- >>> idx.normalize()
- DatetimeIndex(['2014-08-01 00:00:00+05:30',
- '2014-08-01 00:00:00+05:30',
- '2014-08-01 00:00:00+05:30'],
- dtype='datetime64[ns, Asia/Calcutta]', freq=None)
- """
- new_values = normalize_i8_timestamps(self.asi8, self.tz, reso=self._creso)
- dt64_values = new_values.view(self._ndarray.dtype)
- dta = type(self)._simple_new(dt64_values, dtype=dt64_values.dtype)
- dta = dta._with_freq("infer")
- if self.tz is not None:
- dta = dta.tz_localize(self.tz)
- return dta
- def to_period(self, freq=None) -> PeriodArray:
- """
- Cast to PeriodArray/Index at a particular frequency.
- Converts DatetimeArray/Index to PeriodArray/Index.
- Parameters
- ----------
- freq : str or Offset, optional
- One of pandas' :ref:`offset strings <timeseries.offset_aliases>`
- or an Offset object. Will be inferred by default.
- Returns
- -------
- PeriodArray/Index
- Raises
- ------
- ValueError
- When converting a DatetimeArray/Index with non-regular values,
- so that a frequency cannot be inferred.
- See Also
- --------
- PeriodIndex: Immutable ndarray holding ordinal values.
- DatetimeIndex.to_pydatetime: Return DatetimeIndex as object.
- Examples
- --------
- >>> df = pd.DataFrame({"y": [1, 2, 3]},
- ... index=pd.to_datetime(["2000-03-31 00:00:00",
- ... "2000-05-31 00:00:00",
- ... "2000-08-31 00:00:00"]))
- >>> df.index.to_period("M")
- PeriodIndex(['2000-03', '2000-05', '2000-08'],
- dtype='period[M]')
- Infer the daily frequency
- >>> idx = pd.date_range("2017-01-01", periods=2)
- >>> idx.to_period()
- PeriodIndex(['2017-01-01', '2017-01-02'],
- dtype='period[D]')
- """
- from pandas.core.arrays import PeriodArray
- if self.tz is not None:
- warnings.warn(
- "Converting to PeriodArray/Index representation "
- "will drop timezone information.",
- UserWarning,
- stacklevel=find_stack_level(),
- )
- if freq is None:
- freq = self.freqstr or self.inferred_freq
- if freq is None:
- raise ValueError(
- "You must pass a freq argument as current index has none."
- )
- res = get_period_alias(freq)
- # https://github.com/pandas-dev/pandas/issues/33358
- if res is None:
- res = freq
- freq = res
- return PeriodArray._from_datetime64(self._ndarray, freq, tz=self.tz)
- # -----------------------------------------------------------------
- # Properties - Vectorized Timestamp Properties/Methods
- def month_name(self, locale=None) -> npt.NDArray[np.object_]:
- """
- Return the month names with specified locale.
- Parameters
- ----------
- locale : str, optional
- Locale determining the language in which to return the month name.
- Default is English locale (``'en_US.utf8'``). Use the command
- ``locale -a`` on your terminal on Unix systems to find your locale
- language code.
- Returns
- -------
- Series or Index
- Series or Index of month names.
- Examples
- --------
- >>> s = pd.Series(pd.date_range(start='2018-01', freq='M', periods=3))
- >>> s
- 0 2018-01-31
- 1 2018-02-28
- 2 2018-03-31
- dtype: datetime64[ns]
- >>> s.dt.month_name()
- 0 January
- 1 February
- 2 March
- dtype: object
- >>> idx = pd.date_range(start='2018-01', freq='M', periods=3)
- >>> idx
- DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],
- dtype='datetime64[ns]', freq='M')
- >>> idx.month_name()
- Index(['January', 'February', 'March'], dtype='object')
- Using the ``locale`` parameter you can set a different locale language,
- for example: ``idx.month_name(locale='pt_BR.utf8')`` will return month
- names in Brazilian Portuguese language.
- >>> idx = pd.date_range(start='2018-01', freq='M', periods=3)
- >>> idx
- DatetimeIndex(['2018-01-31', '2018-02-28', '2018-03-31'],
- dtype='datetime64[ns]', freq='M')
- >>> idx.month_name(locale='pt_BR.utf8') # doctest: +SKIP
- Index(['Janeiro', 'Fevereiro', 'Março'], dtype='object')
- """
- values = self._local_timestamps()
- result = fields.get_date_name_field(
- values, "month_name", locale=locale, reso=self._creso
- )
- result = self._maybe_mask_results(result, fill_value=None)
- return result
- def day_name(self, locale=None) -> npt.NDArray[np.object_]:
- """
- Return the day names with specified locale.
- Parameters
- ----------
- locale : str, optional
- Locale determining the language in which to return the day name.
- Default is English locale (``'en_US.utf8'``). Use the command
- ``locale -a`` on your terminal on Unix systems to find your locale
- language code.
- Returns
- -------
- Series or Index
- Series or Index of day names.
- Examples
- --------
- >>> s = pd.Series(pd.date_range(start='2018-01-01', freq='D', periods=3))
- >>> s
- 0 2018-01-01
- 1 2018-01-02
- 2 2018-01-03
- dtype: datetime64[ns]
- >>> s.dt.day_name()
- 0 Monday
- 1 Tuesday
- 2 Wednesday
- dtype: object
- >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)
- >>> idx
- DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
- dtype='datetime64[ns]', freq='D')
- >>> idx.day_name()
- Index(['Monday', 'Tuesday', 'Wednesday'], dtype='object')
- Using the ``locale`` parameter you can set a different locale language,
- for example: ``idx.day_name(locale='pt_BR.utf8')`` will return day
- names in Brazilian Portuguese language.
- >>> idx = pd.date_range(start='2018-01-01', freq='D', periods=3)
- >>> idx
- DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03'],
- dtype='datetime64[ns]', freq='D')
- >>> idx.day_name(locale='pt_BR.utf8') # doctest: +SKIP
- Index(['Segunda', 'Terça', 'Quarta'], dtype='object')
- """
- values = self._local_timestamps()
- result = fields.get_date_name_field(
- values, "day_name", locale=locale, reso=self._creso
- )
- result = self._maybe_mask_results(result, fill_value=None)
- return result
- @property
- def time(self) -> npt.NDArray[np.object_]:
- """
- Returns numpy array of :class:`datetime.time` objects.
- The time part of the Timestamps.
- """
- # If the Timestamps have a timezone that is not UTC,
- # convert them into their i8 representation while
- # keeping their timezone and not using UTC
- timestamps = self._local_timestamps()
- return ints_to_pydatetime(timestamps, box="time", reso=self._creso)
- @property
- def timetz(self) -> npt.NDArray[np.object_]:
- """
- Returns numpy array of :class:`datetime.time` objects with timezones.
- The time part of the Timestamps.
- """
- return ints_to_pydatetime(self.asi8, self.tz, box="time", reso=self._creso)
- @property
- def date(self) -> npt.NDArray[np.object_]:
- """
- Returns numpy array of python :class:`datetime.date` objects.
- Namely, the date part of Timestamps without time and
- timezone information.
- """
- # If the Timestamps have a timezone that is not UTC,
- # convert them into their i8 representation while
- # keeping their timezone and not using UTC
- timestamps = self._local_timestamps()
- return ints_to_pydatetime(timestamps, box="date", reso=self._creso)
- def isocalendar(self) -> DataFrame:
- """
- Calculate year, week, and day according to the ISO 8601 standard.
- .. versionadded:: 1.1.0
- Returns
- -------
- DataFrame
- With columns year, week and day.
- See Also
- --------
- Timestamp.isocalendar : Function return a 3-tuple containing ISO year,
- week number, and weekday for the given Timestamp object.
- datetime.date.isocalendar : Return a named tuple object with
- three components: year, week and weekday.
- Examples
- --------
- >>> idx = pd.date_range(start='2019-12-29', freq='D', periods=4)
- >>> idx.isocalendar()
- year week day
- 2019-12-29 2019 52 7
- 2019-12-30 2020 1 1
- 2019-12-31 2020 1 2
- 2020-01-01 2020 1 3
- >>> idx.isocalendar().week
- 2019-12-29 52
- 2019-12-30 1
- 2019-12-31 1
- 2020-01-01 1
- Freq: D, Name: week, dtype: UInt32
- """
- from pandas import DataFrame
- values = self._local_timestamps()
- sarray = fields.build_isocalendar_sarray(values, reso=self._creso)
- iso_calendar_df = DataFrame(
- sarray, columns=["year", "week", "day"], dtype="UInt32"
- )
- if self._hasna:
- iso_calendar_df.iloc[self._isnan] = None
- return iso_calendar_df
- year = _field_accessor(
- "year",
- "Y",
- """
- The year of the datetime.
- Examples
- --------
- >>> datetime_series = pd.Series(
- ... pd.date_range("2000-01-01", periods=3, freq="Y")
- ... )
- >>> datetime_series
- 0 2000-12-31
- 1 2001-12-31
- 2 2002-12-31
- dtype: datetime64[ns]
- >>> datetime_series.dt.year
- 0 2000
- 1 2001
- 2 2002
- dtype: int32
- """,
- )
- month = _field_accessor(
- "month",
- "M",
- """
- The month as January=1, December=12.
- Examples
- --------
- >>> datetime_series = pd.Series(
- ... pd.date_range("2000-01-01", periods=3, freq="M")
- ... )
- >>> datetime_series
- 0 2000-01-31
- 1 2000-02-29
- 2 2000-03-31
- dtype: datetime64[ns]
- >>> datetime_series.dt.month
- 0 1
- 1 2
- 2 3
- dtype: int32
- """,
- )
- day = _field_accessor(
- "day",
- "D",
- """
- The day of the datetime.
- Examples
- --------
- >>> datetime_series = pd.Series(
- ... pd.date_range("2000-01-01", periods=3, freq="D")
- ... )
- >>> datetime_series
- 0 2000-01-01
- 1 2000-01-02
- 2 2000-01-03
- dtype: datetime64[ns]
- >>> datetime_series.dt.day
- 0 1
- 1 2
- 2 3
- dtype: int32
- """,
- )
- hour = _field_accessor(
- "hour",
- "h",
- """
- The hours of the datetime.
- Examples
- --------
- >>> datetime_series = pd.Series(
- ... pd.date_range("2000-01-01", periods=3, freq="h")
- ... )
- >>> datetime_series
- 0 2000-01-01 00:00:00
- 1 2000-01-01 01:00:00
- 2 2000-01-01 02:00:00
- dtype: datetime64[ns]
- >>> datetime_series.dt.hour
- 0 0
- 1 1
- 2 2
- dtype: int32
- """,
- )
- minute = _field_accessor(
- "minute",
- "m",
- """
- The minutes of the datetime.
- Examples
- --------
- >>> datetime_series = pd.Series(
- ... pd.date_range("2000-01-01", periods=3, freq="T")
- ... )
- >>> datetime_series
- 0 2000-01-01 00:00:00
- 1 2000-01-01 00:01:00
- 2 2000-01-01 00:02:00
- dtype: datetime64[ns]
- >>> datetime_series.dt.minute
- 0 0
- 1 1
- 2 2
- dtype: int32
- """,
- )
- second = _field_accessor(
- "second",
- "s",
- """
- The seconds of the datetime.
- Examples
- --------
- >>> datetime_series = pd.Series(
- ... pd.date_range("2000-01-01", periods=3, freq="s")
- ... )
- >>> datetime_series
- 0 2000-01-01 00:00:00
- 1 2000-01-01 00:00:01
- 2 2000-01-01 00:00:02
- dtype: datetime64[ns]
- >>> datetime_series.dt.second
- 0 0
- 1 1
- 2 2
- dtype: int32
- """,
- )
- microsecond = _field_accessor(
- "microsecond",
- "us",
- """
- The microseconds of the datetime.
- Examples
- --------
- >>> datetime_series = pd.Series(
- ... pd.date_range("2000-01-01", periods=3, freq="us")
- ... )
- >>> datetime_series
- 0 2000-01-01 00:00:00.000000
- 1 2000-01-01 00:00:00.000001
- 2 2000-01-01 00:00:00.000002
- dtype: datetime64[ns]
- >>> datetime_series.dt.microsecond
- 0 0
- 1 1
- 2 2
- dtype: int32
- """,
- )
- nanosecond = _field_accessor(
- "nanosecond",
- "ns",
- """
- The nanoseconds of the datetime.
- Examples
- --------
- >>> datetime_series = pd.Series(
- ... pd.date_range("2000-01-01", periods=3, freq="ns")
- ... )
- >>> datetime_series
- 0 2000-01-01 00:00:00.000000000
- 1 2000-01-01 00:00:00.000000001
- 2 2000-01-01 00:00:00.000000002
- dtype: datetime64[ns]
- >>> datetime_series.dt.nanosecond
- 0 0
- 1 1
- 2 2
- dtype: int32
- """,
- )
- _dayofweek_doc = """
- The day of the week with Monday=0, Sunday=6.
- Return the day of the week. It is assumed the week starts on
- Monday, which is denoted by 0 and ends on Sunday which is denoted
- by 6. This method is available on both Series with datetime
- values (using the `dt` accessor) or DatetimeIndex.
- Returns
- -------
- Series or Index
- Containing integers indicating the day number.
- See Also
- --------
- Series.dt.dayofweek : Alias.
- Series.dt.weekday : Alias.
- Series.dt.day_name : Returns the name of the day of the week.
- Examples
- --------
- >>> s = pd.date_range('2016-12-31', '2017-01-08', freq='D').to_series()
- >>> s.dt.dayofweek
- 2016-12-31 5
- 2017-01-01 6
- 2017-01-02 0
- 2017-01-03 1
- 2017-01-04 2
- 2017-01-05 3
- 2017-01-06 4
- 2017-01-07 5
- 2017-01-08 6
- Freq: D, dtype: int32
- """
- day_of_week = _field_accessor("day_of_week", "dow", _dayofweek_doc)
- dayofweek = day_of_week
- weekday = day_of_week
- day_of_year = _field_accessor(
- "dayofyear",
- "doy",
- """
- The ordinal day of the year.
- """,
- )
- dayofyear = day_of_year
- quarter = _field_accessor(
- "quarter",
- "q",
- """
- The quarter of the date.
- """,
- )
- days_in_month = _field_accessor(
- "days_in_month",
- "dim",
- """
- The number of days in the month.
- """,
- )
- daysinmonth = days_in_month
- _is_month_doc = """
- Indicates whether the date is the {first_or_last} day of the month.
- Returns
- -------
- Series or array
- For Series, returns a Series with boolean values.
- For DatetimeIndex, returns a boolean array.
- See Also
- --------
- is_month_start : Return a boolean indicating whether the date
- is the first day of the month.
- is_month_end : Return a boolean indicating whether the date
- is the last day of the month.
- Examples
- --------
- This method is available on Series with datetime values under
- the ``.dt`` accessor, and directly on DatetimeIndex.
- >>> s = pd.Series(pd.date_range("2018-02-27", periods=3))
- >>> s
- 0 2018-02-27
- 1 2018-02-28
- 2 2018-03-01
- dtype: datetime64[ns]
- >>> s.dt.is_month_start
- 0 False
- 1 False
- 2 True
- dtype: bool
- >>> s.dt.is_month_end
- 0 False
- 1 True
- 2 False
- dtype: bool
- >>> idx = pd.date_range("2018-02-27", periods=3)
- >>> idx.is_month_start
- array([False, False, True])
- >>> idx.is_month_end
- array([False, True, False])
- """
- is_month_start = _field_accessor(
- "is_month_start", "is_month_start", _is_month_doc.format(first_or_last="first")
- )
- is_month_end = _field_accessor(
- "is_month_end", "is_month_end", _is_month_doc.format(first_or_last="last")
- )
- is_quarter_start = _field_accessor(
- "is_quarter_start",
- "is_quarter_start",
- """
- Indicator for whether the date is the first day of a quarter.
- Returns
- -------
- is_quarter_start : Series or DatetimeIndex
- The same type as the original data with boolean values. Series will
- have the same name and index. DatetimeIndex will have the same
- name.
- See Also
- --------
- quarter : Return the quarter of the date.
- is_quarter_end : Similar property for indicating the quarter end.
- Examples
- --------
- This method is available on Series with datetime values under
- the ``.dt`` accessor, and directly on DatetimeIndex.
- >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30",
- ... periods=4)})
- >>> df.assign(quarter=df.dates.dt.quarter,
- ... is_quarter_start=df.dates.dt.is_quarter_start)
- dates quarter is_quarter_start
- 0 2017-03-30 1 False
- 1 2017-03-31 1 False
- 2 2017-04-01 2 True
- 3 2017-04-02 2 False
- >>> idx = pd.date_range('2017-03-30', periods=4)
- >>> idx
- DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'],
- dtype='datetime64[ns]', freq='D')
- >>> idx.is_quarter_start
- array([False, False, True, False])
- """,
- )
- is_quarter_end = _field_accessor(
- "is_quarter_end",
- "is_quarter_end",
- """
- Indicator for whether the date is the last day of a quarter.
- Returns
- -------
- is_quarter_end : Series or DatetimeIndex
- The same type as the original data with boolean values. Series will
- have the same name and index. DatetimeIndex will have the same
- name.
- See Also
- --------
- quarter : Return the quarter of the date.
- is_quarter_start : Similar property indicating the quarter start.
- Examples
- --------
- This method is available on Series with datetime values under
- the ``.dt`` accessor, and directly on DatetimeIndex.
- >>> df = pd.DataFrame({'dates': pd.date_range("2017-03-30",
- ... periods=4)})
- >>> df.assign(quarter=df.dates.dt.quarter,
- ... is_quarter_end=df.dates.dt.is_quarter_end)
- dates quarter is_quarter_end
- 0 2017-03-30 1 False
- 1 2017-03-31 1 True
- 2 2017-04-01 2 False
- 3 2017-04-02 2 False
- >>> idx = pd.date_range('2017-03-30', periods=4)
- >>> idx
- DatetimeIndex(['2017-03-30', '2017-03-31', '2017-04-01', '2017-04-02'],
- dtype='datetime64[ns]', freq='D')
- >>> idx.is_quarter_end
- array([False, True, False, False])
- """,
- )
- is_year_start = _field_accessor(
- "is_year_start",
- "is_year_start",
- """
- Indicate whether the date is the first day of a year.
- Returns
- -------
- Series or DatetimeIndex
- The same type as the original data with boolean values. Series will
- have the same name and index. DatetimeIndex will have the same
- name.
- See Also
- --------
- is_year_end : Similar property indicating the last day of the year.
- Examples
- --------
- This method is available on Series with datetime values under
- the ``.dt`` accessor, and directly on DatetimeIndex.
- >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3))
- >>> dates
- 0 2017-12-30
- 1 2017-12-31
- 2 2018-01-01
- dtype: datetime64[ns]
- >>> dates.dt.is_year_start
- 0 False
- 1 False
- 2 True
- dtype: bool
- >>> idx = pd.date_range("2017-12-30", periods=3)
- >>> idx
- DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'],
- dtype='datetime64[ns]', freq='D')
- >>> idx.is_year_start
- array([False, False, True])
- """,
- )
- is_year_end = _field_accessor(
- "is_year_end",
- "is_year_end",
- """
- Indicate whether the date is the last day of the year.
- Returns
- -------
- Series or DatetimeIndex
- The same type as the original data with boolean values. Series will
- have the same name and index. DatetimeIndex will have the same
- name.
- See Also
- --------
- is_year_start : Similar property indicating the start of the year.
- Examples
- --------
- This method is available on Series with datetime values under
- the ``.dt`` accessor, and directly on DatetimeIndex.
- >>> dates = pd.Series(pd.date_range("2017-12-30", periods=3))
- >>> dates
- 0 2017-12-30
- 1 2017-12-31
- 2 2018-01-01
- dtype: datetime64[ns]
- >>> dates.dt.is_year_end
- 0 False
- 1 True
- 2 False
- dtype: bool
- >>> idx = pd.date_range("2017-12-30", periods=3)
- >>> idx
- DatetimeIndex(['2017-12-30', '2017-12-31', '2018-01-01'],
- dtype='datetime64[ns]', freq='D')
- >>> idx.is_year_end
- array([False, True, False])
- """,
- )
- is_leap_year = _field_accessor(
- "is_leap_year",
- "is_leap_year",
- """
- Boolean indicator if the date belongs to a leap year.
- A leap year is a year, which has 366 days (instead of 365) including
- 29th of February as an intercalary day.
- Leap years are years which are multiples of four with the exception
- of years divisible by 100 but not by 400.
- Returns
- -------
- Series or ndarray
- Booleans indicating if dates belong to a leap year.
- Examples
- --------
- This method is available on Series with datetime values under
- the ``.dt`` accessor, and directly on DatetimeIndex.
- >>> idx = pd.date_range("2012-01-01", "2015-01-01", freq="Y")
- >>> idx
- DatetimeIndex(['2012-12-31', '2013-12-31', '2014-12-31'],
- dtype='datetime64[ns]', freq='A-DEC')
- >>> idx.is_leap_year
- array([ True, False, False])
- >>> dates_series = pd.Series(idx)
- >>> dates_series
- 0 2012-12-31
- 1 2013-12-31
- 2 2014-12-31
- dtype: datetime64[ns]
- >>> dates_series.dt.is_leap_year
- 0 True
- 1 False
- 2 False
- dtype: bool
- """,
- )
- def to_julian_date(self) -> npt.NDArray[np.float64]:
- """
- Convert Datetime Array to float64 ndarray of Julian Dates.
- 0 Julian date is noon January 1, 4713 BC.
- https://en.wikipedia.org/wiki/Julian_day
- """
- # http://mysite.verizon.net/aesir_research/date/jdalg2.htm
- year = np.asarray(self.year)
- month = np.asarray(self.month)
- day = np.asarray(self.day)
- testarr = month < 3
- year[testarr] -= 1
- month[testarr] += 12
- return (
- day
- + np.fix((153 * month - 457) / 5)
- + 365 * year
- + np.floor(year / 4)
- - np.floor(year / 100)
- + np.floor(year / 400)
- + 1_721_118.5
- + (
- self.hour
- + self.minute / 60
- + self.second / 3600
- + self.microsecond / 3600 / 10**6
- + self.nanosecond / 3600 / 10**9
- )
- / 24
- )
- # -----------------------------------------------------------------
- # Reductions
- def std(
- self,
- axis=None,
- dtype=None,
- out=None,
- ddof: int = 1,
- keepdims: bool = False,
- skipna: bool = True,
- ):
- """
- Return sample standard deviation over requested axis.
- Normalized by N-1 by default. This can be changed using the ddof argument
- Parameters
- ----------
- axis : int optional, default None
- Axis for the function to be applied on.
- For `Series` this parameter is unused and defaults to `None`.
- ddof : int, default 1
- Degrees of Freedom. The divisor used in calculations is N - ddof,
- where N represents the number of elements.
- skipna : bool, default True
- Exclude NA/null values. If an entire row/column is NA, the result will be
- NA.
- Returns
- -------
- Timedelta
- """
- # Because std is translation-invariant, we can get self.std
- # by calculating (self - Timestamp(0)).std, and we can do it
- # without creating a copy by using a view on self._ndarray
- from pandas.core.arrays import TimedeltaArray
- # Find the td64 dtype with the same resolution as our dt64 dtype
- dtype_str = self._ndarray.dtype.name.replace("datetime64", "timedelta64")
- dtype = np.dtype(dtype_str)
- tda = TimedeltaArray._simple_new(self._ndarray.view(dtype), dtype=dtype)
- return tda.std(axis=axis, out=out, ddof=ddof, keepdims=keepdims, skipna=skipna)
- # -------------------------------------------------------------------
- # Constructor Helpers
- def _sequence_to_dt64ns(
- data,
- *,
- copy: bool = False,
- tz: tzinfo | None = None,
- dayfirst: bool = False,
- yearfirst: bool = False,
- ambiguous: TimeAmbiguous = "raise",
- out_unit: str | None = None,
- ):
- """
- Parameters
- ----------
- data : list-like
- copy : bool, default False
- tz : tzinfo or None, default None
- dayfirst : bool, default False
- yearfirst : bool, default False
- ambiguous : str, bool, or arraylike, default 'raise'
- See pandas._libs.tslibs.tzconversion.tz_localize_to_utc.
- out_unit : str or None, default None
- Desired output resolution.
- Returns
- -------
- result : numpy.ndarray
- The sequence converted to a numpy array with dtype ``datetime64[ns]``.
- tz : tzinfo or None
- Either the user-provided tzinfo or one inferred from the data.
- inferred_freq : Tick or None
- The inferred frequency of the sequence.
- Raises
- ------
- TypeError : PeriodDType data is passed
- """
- inferred_freq = None
- data, copy = dtl.ensure_arraylike_for_datetimelike(
- data, copy, cls_name="DatetimeArray"
- )
- if isinstance(data, DatetimeArray):
- inferred_freq = data.freq
- # By this point we are assured to have either a numpy array or Index
- data, copy = maybe_convert_dtype(data, copy, tz=tz)
- data_dtype = getattr(data, "dtype", None)
- out_dtype = DT64NS_DTYPE
- if out_unit is not None:
- out_dtype = np.dtype(f"M8[{out_unit}]")
- if (
- is_object_dtype(data_dtype)
- or is_string_dtype(data_dtype)
- or is_sparse(data_dtype)
- ):
- # TODO: We do not have tests specific to string-dtypes,
- # also complex or categorical or other extension
- copy = False
- if lib.infer_dtype(data, skipna=False) == "integer":
- data = data.astype(np.int64)
- elif tz is not None and ambiguous == "raise":
- # TODO: yearfirst/dayfirst/etc?
- obj_data = np.asarray(data, dtype=object)
- i8data = tslib.array_to_datetime_with_tz(obj_data, tz)
- return i8data.view(DT64NS_DTYPE), tz, None
- else:
- # data comes back here as either i8 to denote UTC timestamps
- # or M8[ns] to denote wall times
- data, inferred_tz = objects_to_datetime64ns(
- data,
- dayfirst=dayfirst,
- yearfirst=yearfirst,
- allow_object=False,
- )
- if tz and inferred_tz:
- # two timezones: convert to intended from base UTC repr
- assert data.dtype == "i8"
- # GH#42505
- # by convention, these are _already_ UTC, e.g
- return data.view(DT64NS_DTYPE), tz, None
- elif inferred_tz:
- tz = inferred_tz
- data_dtype = data.dtype
- # `data` may have originally been a Categorical[datetime64[ns, tz]],
- # so we need to handle these types.
- if is_datetime64tz_dtype(data_dtype):
- # DatetimeArray -> ndarray
- tz = _maybe_infer_tz(tz, data.tz)
- result = data._ndarray
- elif is_datetime64_dtype(data_dtype):
- # tz-naive DatetimeArray or ndarray[datetime64]
- data = getattr(data, "_ndarray", data)
- new_dtype = data.dtype
- data_unit = get_unit_from_dtype(new_dtype)
- if not is_supported_unit(data_unit):
- # Cast to the nearest supported unit, generally "s"
- new_reso = get_supported_reso(data_unit)
- new_unit = npy_unit_to_abbrev(new_reso)
- new_dtype = np.dtype(f"M8[{new_unit}]")
- data = astype_overflowsafe(data, dtype=new_dtype, copy=False)
- data_unit = get_unit_from_dtype(new_dtype)
- copy = False
- if data.dtype.byteorder == ">":
- # TODO: better way to handle this? non-copying alternative?
- # without this, test_constructor_datetime64_bigendian fails
- data = data.astype(data.dtype.newbyteorder("<"))
- new_dtype = data.dtype
- copy = False
- if tz is not None:
- # Convert tz-naive to UTC
- # TODO: if tz is UTC, are there situations where we *don't* want a
- # copy? tz_localize_to_utc always makes one.
- shape = data.shape
- if data.ndim > 1:
- data = data.ravel()
- data = tzconversion.tz_localize_to_utc(
- data.view("i8"), tz, ambiguous=ambiguous, creso=data_unit
- )
- data = data.view(new_dtype)
- data = data.reshape(shape)
- assert data.dtype == new_dtype, data.dtype
- result = data
- else:
- # must be integer dtype otherwise
- # assume this data are epoch timestamps
- if data.dtype != INT64_DTYPE:
- data = data.astype(np.int64, copy=False)
- result = data.view(out_dtype)
- if copy:
- result = result.copy()
- assert isinstance(result, np.ndarray), type(result)
- assert result.dtype.kind == "M"
- assert result.dtype != "M8"
- assert is_supported_unit(get_unit_from_dtype(result.dtype))
- return result, tz, inferred_freq
- def objects_to_datetime64ns(
- data: np.ndarray,
- dayfirst,
- yearfirst,
- utc: bool = False,
- errors: DateTimeErrorChoices = "raise",
- allow_object: bool = False,
- ):
- """
- Convert data to array of timestamps.
- Parameters
- ----------
- data : np.ndarray[object]
- dayfirst : bool
- yearfirst : bool
- utc : bool, default False
- Whether to convert/localize timestamps to UTC.
- errors : {'raise', 'ignore', 'coerce'}
- allow_object : bool
- Whether to return an object-dtype ndarray instead of raising if the
- data contains more than one timezone.
- Returns
- -------
- result : ndarray
- np.int64 dtype if returned values represent UTC timestamps
- np.datetime64[ns] if returned values represent wall times
- object if mixed timezones
- inferred_tz : tzinfo or None
- Raises
- ------
- ValueError : if data cannot be converted to datetimes
- """
- assert errors in ["raise", "ignore", "coerce"]
- # if str-dtype, convert
- data = np.array(data, copy=False, dtype=np.object_)
- result, tz_parsed = tslib.array_to_datetime(
- data,
- errors=errors,
- utc=utc,
- dayfirst=dayfirst,
- yearfirst=yearfirst,
- )
- if tz_parsed is not None:
- # We can take a shortcut since the datetime64 numpy array
- # is in UTC
- # Return i8 values to denote unix timestamps
- return result.view("i8"), tz_parsed
- elif is_datetime64_dtype(result):
- # returning M8[ns] denotes wall-times; since tz is None
- # the distinction is a thin one
- return result, tz_parsed
- elif is_object_dtype(result):
- # GH#23675 when called via `pd.to_datetime`, returning an object-dtype
- # array is allowed. When called via `pd.DatetimeIndex`, we can
- # only accept datetime64 dtype, so raise TypeError if object-dtype
- # is returned, as that indicates the values can be recognized as
- # datetimes but they have conflicting timezones/awareness
- if allow_object:
- return result, tz_parsed
- raise TypeError(result)
- else: # pragma: no cover
- # GH#23675 this TypeError should never be hit, whereas the TypeError
- # in the object-dtype branch above is reachable.
- raise TypeError(result)
- def maybe_convert_dtype(data, copy: bool, tz: tzinfo | None = None):
- """
- Convert data based on dtype conventions, issuing
- errors where appropriate.
- Parameters
- ----------
- data : np.ndarray or pd.Index
- copy : bool
- tz : tzinfo or None, default None
- Returns
- -------
- data : np.ndarray or pd.Index
- copy : bool
- Raises
- ------
- TypeError : PeriodDType data is passed
- """
- if not hasattr(data, "dtype"):
- # e.g. collections.deque
- return data, copy
- if is_float_dtype(data.dtype):
- # pre-2.0 we treated these as wall-times, inconsistent with ints
- # GH#23675, GH#45573 deprecated to treat symmetrically with integer dtypes.
- # Note: data.astype(np.int64) fails ARM tests, see
- # https://github.com/pandas-dev/pandas/issues/49468.
- data = data.astype(DT64NS_DTYPE).view("i8")
- copy = False
- elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype):
- # GH#29794 enforcing deprecation introduced in GH#23539
- raise TypeError(f"dtype {data.dtype} cannot be converted to datetime64[ns]")
- elif is_period_dtype(data.dtype):
- # Note: without explicitly raising here, PeriodIndex
- # test_setops.test_join_does_not_recur fails
- raise TypeError(
- "Passing PeriodDtype data is invalid. Use `data.to_timestamp()` instead"
- )
- elif is_extension_array_dtype(data.dtype) and not is_datetime64tz_dtype(data.dtype):
- # TODO: We have no tests for these
- data = np.array(data, dtype=np.object_)
- copy = False
- return data, copy
- # -------------------------------------------------------------------
- # Validation and Inference
- def _maybe_infer_tz(tz: tzinfo | None, inferred_tz: tzinfo | None) -> tzinfo | None:
- """
- If a timezone is inferred from data, check that it is compatible with
- the user-provided timezone, if any.
- Parameters
- ----------
- tz : tzinfo or None
- inferred_tz : tzinfo or None
- Returns
- -------
- tz : tzinfo or None
- Raises
- ------
- TypeError : if both timezones are present but do not match
- """
- if tz is None:
- tz = inferred_tz
- elif inferred_tz is None:
- pass
- elif not timezones.tz_compare(tz, inferred_tz):
- raise TypeError(
- f"data is already tz-aware {inferred_tz}, unable to "
- f"set specified tz: {tz}"
- )
- return tz
- def _validate_dt64_dtype(dtype):
- """
- Check that a dtype, if passed, represents either a numpy datetime64[ns]
- dtype or a pandas DatetimeTZDtype.
- Parameters
- ----------
- dtype : object
- Returns
- -------
- dtype : None, numpy.dtype, or DatetimeTZDtype
- Raises
- ------
- ValueError : invalid dtype
- Notes
- -----
- Unlike _validate_tz_from_dtype, this does _not_ allow non-existent
- tz errors to go through
- """
- if dtype is not None:
- dtype = pandas_dtype(dtype)
- if is_dtype_equal(dtype, np.dtype("M8")):
- # no precision, disallowed GH#24806
- msg = (
- "Passing in 'datetime64' dtype with no precision is not allowed. "
- "Please pass in 'datetime64[ns]' instead."
- )
- raise ValueError(msg)
- if (
- isinstance(dtype, np.dtype)
- and (dtype.kind != "M" or not is_supported_unit(get_unit_from_dtype(dtype)))
- ) or not isinstance(dtype, (np.dtype, DatetimeTZDtype)):
- raise ValueError(
- f"Unexpected value for 'dtype': '{dtype}'. "
- "Must be 'datetime64[s]', 'datetime64[ms]', 'datetime64[us]', "
- "'datetime64[ns]' or DatetimeTZDtype'."
- )
- if getattr(dtype, "tz", None):
- # https://github.com/pandas-dev/pandas/issues/18595
- # Ensure that we have a standard timezone for pytz objects.
- # Without this, things like adding an array of timedeltas and
- # a tz-aware Timestamp (with a tz specific to its datetime) will
- # be incorrect(ish?) for the array as a whole
- dtype = cast(DatetimeTZDtype, dtype)
- dtype = DatetimeTZDtype(tz=timezones.tz_standardize(dtype.tz))
- return dtype
- def _validate_tz_from_dtype(
- dtype, tz: tzinfo | None, explicit_tz_none: bool = False
- ) -> tzinfo | None:
- """
- If the given dtype is a DatetimeTZDtype, extract the implied
- tzinfo object from it and check that it does not conflict with the given
- tz.
- Parameters
- ----------
- dtype : dtype, str
- tz : None, tzinfo
- explicit_tz_none : bool, default False
- Whether tz=None was passed explicitly, as opposed to lib.no_default.
- Returns
- -------
- tz : consensus tzinfo
- Raises
- ------
- ValueError : on tzinfo mismatch
- """
- if dtype is not None:
- if isinstance(dtype, str):
- try:
- dtype = DatetimeTZDtype.construct_from_string(dtype)
- except TypeError:
- # Things like `datetime64[ns]`, which is OK for the
- # constructors, but also nonsense, which should be validated
- # but not by us. We *do* allow non-existent tz errors to
- # go through
- pass
- dtz = getattr(dtype, "tz", None)
- if dtz is not None:
- if tz is not None and not timezones.tz_compare(tz, dtz):
- raise ValueError("cannot supply both a tz and a dtype with a tz")
- if explicit_tz_none:
- raise ValueError("Cannot pass both a timezone-aware dtype and tz=None")
- tz = dtz
- if tz is not None and is_datetime64_dtype(dtype):
- # We also need to check for the case where the user passed a
- # tz-naive dtype (i.e. datetime64[ns])
- if tz is not None and not timezones.tz_compare(tz, dtz):
- raise ValueError(
- "cannot supply both a tz and a "
- "timezone-naive dtype (i.e. datetime64[ns])"
- )
- return tz
- def _infer_tz_from_endpoints(
- start: Timestamp, end: Timestamp, tz: tzinfo | None
- ) -> tzinfo | None:
- """
- If a timezone is not explicitly given via `tz`, see if one can
- be inferred from the `start` and `end` endpoints. If more than one
- of these inputs provides a timezone, require that they all agree.
- Parameters
- ----------
- start : Timestamp
- end : Timestamp
- tz : tzinfo or None
- Returns
- -------
- tz : tzinfo or None
- Raises
- ------
- TypeError : if start and end timezones do not agree
- """
- try:
- inferred_tz = timezones.infer_tzinfo(start, end)
- except AssertionError as err:
- # infer_tzinfo raises AssertionError if passed mismatched timezones
- raise TypeError(
- "Start and end cannot both be tz-aware with different timezones"
- ) from err
- inferred_tz = timezones.maybe_get_tz(inferred_tz)
- tz = timezones.maybe_get_tz(tz)
- if tz is not None and inferred_tz is not None:
- if not timezones.tz_compare(inferred_tz, tz):
- raise AssertionError("Inferred time zone not equal to passed time zone")
- elif inferred_tz is not None:
- tz = inferred_tz
- return tz
- def _maybe_normalize_endpoints(
- start: Timestamp | None, end: Timestamp | None, normalize: bool
- ):
- if normalize:
- if start is not None:
- start = start.normalize()
- if end is not None:
- end = end.normalize()
- return start, end
- def _maybe_localize_point(ts, is_none, is_not_none, freq, tz, ambiguous, nonexistent):
- """
- Localize a start or end Timestamp to the timezone of the corresponding
- start or end Timestamp
- Parameters
- ----------
- ts : start or end Timestamp to potentially localize
- is_none : argument that should be None
- is_not_none : argument that should not be None
- freq : Tick, DateOffset, or None
- tz : str, timezone object or None
- ambiguous: str, localization behavior for ambiguous times
- nonexistent: str, localization behavior for nonexistent times
- Returns
- -------
- ts : Timestamp
- """
- # Make sure start and end are timezone localized if:
- # 1) freq = a Timedelta-like frequency (Tick)
- # 2) freq = None i.e. generating a linspaced range
- if is_none is None and is_not_none is not None:
- # Note: We can't ambiguous='infer' a singular ambiguous time; however,
- # we have historically defaulted ambiguous=False
- ambiguous = ambiguous if ambiguous != "infer" else False
- localize_args = {"ambiguous": ambiguous, "nonexistent": nonexistent, "tz": None}
- if isinstance(freq, Tick) or freq is None:
- localize_args["tz"] = tz
- ts = ts.tz_localize(**localize_args)
- return ts
- def _generate_range(
- start: Timestamp | None,
- end: Timestamp | None,
- periods: int | None,
- offset: BaseOffset,
- *,
- unit: str,
- ):
- """
- Generates a sequence of dates corresponding to the specified time
- offset. Similar to dateutil.rrule except uses pandas DateOffset
- objects to represent time increments.
- Parameters
- ----------
- start : Timestamp or None
- end : Timestamp or None
- periods : int or None
- offset : DateOffset
- unit : str
- Notes
- -----
- * This method is faster for generating weekdays than dateutil.rrule
- * At least two of (start, end, periods) must be specified.
- * If both start and end are specified, the returned dates will
- satisfy start <= date <= end.
- Returns
- -------
- dates : generator object
- """
- offset = to_offset(offset)
- # Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]";
- # expected "Union[integer[Any], float, str, date, datetime64]"
- start = Timestamp(start) # type: ignore[arg-type]
- if start is not NaT:
- start = start.as_unit(unit)
- else:
- start = None
- # Argument 1 to "Timestamp" has incompatible type "Optional[Timestamp]";
- # expected "Union[integer[Any], float, str, date, datetime64]"
- end = Timestamp(end) # type: ignore[arg-type]
- if end is not NaT:
- end = end.as_unit(unit)
- else:
- end = None
- if start and not offset.is_on_offset(start):
- # Incompatible types in assignment (expression has type "datetime",
- # variable has type "Optional[Timestamp]")
- start = offset.rollforward(start) # type: ignore[assignment]
- elif end and not offset.is_on_offset(end):
- # Incompatible types in assignment (expression has type "datetime",
- # variable has type "Optional[Timestamp]")
- end = offset.rollback(end) # type: ignore[assignment]
- # Unsupported operand types for < ("Timestamp" and "None")
- if periods is None and end < start and offset.n >= 0: # type: ignore[operator]
- end = None
- periods = 0
- if end is None:
- # error: No overload variant of "__radd__" of "BaseOffset" matches
- # argument type "None"
- end = start + (periods - 1) * offset # type: ignore[operator]
- if start is None:
- # error: No overload variant of "__radd__" of "BaseOffset" matches
- # argument type "None"
- start = end - (periods - 1) * offset # type: ignore[operator]
- start = cast(Timestamp, start)
- end = cast(Timestamp, end)
- cur = start
- if offset.n >= 0:
- while cur <= end:
- yield cur
- if cur == end:
- # GH#24252 avoid overflows by not performing the addition
- # in offset.apply unless we have to
- break
- # faster than cur + offset
- next_date = offset._apply(cur).as_unit(unit)
- if next_date <= cur:
- raise ValueError(f"Offset {offset} did not increment date")
- cur = next_date
- else:
- while cur >= end:
- yield cur
- if cur == end:
- # GH#24252 avoid overflows by not performing the addition
- # in offset.apply unless we have to
- break
- # faster than cur + offset
- next_date = offset._apply(cur).as_unit(unit)
- if next_date >= cur:
- raise ValueError(f"Offset {offset} did not decrement date")
- cur = next_date
|