12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171 |
- import collections
- import warnings
- cimport cython
- from cpython.object cimport (
- Py_EQ,
- Py_GE,
- Py_GT,
- Py_LE,
- Py_LT,
- Py_NE,
- PyObject,
- PyObject_RichCompare,
- )
- import numpy as np
- cimport numpy as cnp
- from numpy cimport (
- int64_t,
- ndarray,
- )
- cnp.import_array()
- from cpython.datetime cimport (
- PyDateTime_Check,
- PyDelta_Check,
- import_datetime,
- timedelta,
- )
- import_datetime()
- cimport pandas._libs.tslibs.util as util
- from pandas._libs.tslibs.base cimport ABCTimestamp
- from pandas._libs.tslibs.conversion cimport (
- cast_from_unit,
- precision_from_unit,
- )
- from pandas._libs.tslibs.dtypes cimport (
- get_supported_reso,
- npy_unit_to_abbrev,
- )
- from pandas._libs.tslibs.nattype cimport (
- NPY_NAT,
- c_NaT as NaT,
- c_nat_strings as nat_strings,
- checknull_with_nat,
- )
- from pandas._libs.tslibs.np_datetime cimport (
- NPY_DATETIMEUNIT,
- NPY_FR_ns,
- cmp_dtstructs,
- cmp_scalar,
- convert_reso,
- get_datetime64_unit,
- get_timedelta64_value,
- get_unit_from_dtype,
- npy_datetimestruct,
- pandas_datetime_to_datetimestruct,
- pandas_timedelta_to_timedeltastruct,
- pandas_timedeltastruct,
- )
- from pandas._libs.tslibs.np_datetime import (
- OutOfBoundsDatetime,
- OutOfBoundsTimedelta,
- )
- from pandas._libs.tslibs.offsets cimport is_tick_object
- from pandas._libs.tslibs.util cimport (
- is_array,
- is_datetime64_object,
- is_float_object,
- is_integer_object,
- is_timedelta64_object,
- )
- from pandas._libs.tslibs.fields import (
- RoundTo,
- round_nsint64,
- )
- # ----------------------------------------------------------------------
- # Constants
- # components named tuple
- Components = collections.namedtuple(
- "Components",
- [
- "days",
- "hours",
- "minutes",
- "seconds",
- "milliseconds",
- "microseconds",
- "nanoseconds",
- ],
- )
- # This should be kept consistent with UnitChoices in pandas/_libs/tslibs/timedeltas.pyi
- cdef dict timedelta_abbrevs = {
- "Y": "Y",
- "y": "Y",
- "M": "M",
- "W": "W",
- "w": "W",
- "D": "D",
- "d": "D",
- "days": "D",
- "day": "D",
- "hours": "h",
- "hour": "h",
- "hr": "h",
- "h": "h",
- "m": "m",
- "minute": "m",
- "min": "m",
- "minutes": "m",
- "t": "m",
- "s": "s",
- "seconds": "s",
- "sec": "s",
- "second": "s",
- "ms": "ms",
- "milliseconds": "ms",
- "millisecond": "ms",
- "milli": "ms",
- "millis": "ms",
- "l": "ms",
- "us": "us",
- "microseconds": "us",
- "microsecond": "us",
- "µs": "us",
- "micro": "us",
- "micros": "us",
- "u": "us",
- "ns": "ns",
- "nanoseconds": "ns",
- "nano": "ns",
- "nanos": "ns",
- "nanosecond": "ns",
- "n": "ns",
- }
- _no_input = object()
- # ----------------------------------------------------------------------
- # API
- @cython.boundscheck(False)
- @cython.wraparound(False)
- def ints_to_pytimedelta(ndarray m8values, box=False):
- """
- convert an i8 repr to an ndarray of timedelta or Timedelta (if box ==
- True)
- Parameters
- ----------
- arr : ndarray[timedelta64]
- box : bool, default False
- Returns
- -------
- result : ndarray[object]
- array of Timedelta or timedeltas objects
- """
- cdef:
- NPY_DATETIMEUNIT reso = get_unit_from_dtype(m8values.dtype)
- Py_ssize_t i, n = m8values.size
- int64_t value
- object res_val
- # Note that `result` (and thus `result_flat`) is C-order and
- # `it` iterates C-order as well, so the iteration matches
- # See discussion at
- # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305
- ndarray result = cnp.PyArray_EMPTY(
- m8values.ndim, m8values.shape, cnp.NPY_OBJECT, 0
- )
- object[::1] res_flat = result.ravel() # should NOT be a copy
- ndarray arr = m8values.view("i8")
- cnp.flatiter it = cnp.PyArray_IterNew(arr)
- for i in range(n):
- # Analogous to: value = arr[i]
- value = (<int64_t*>cnp.PyArray_ITER_DATA(it))[0]
- if value == NPY_NAT:
- res_val = <object>NaT
- else:
- if box:
- res_val = _timedelta_from_value_and_reso(Timedelta, value, reso=reso)
- elif reso == NPY_DATETIMEUNIT.NPY_FR_ns:
- res_val = timedelta(microseconds=int(value) / 1000)
- elif reso == NPY_DATETIMEUNIT.NPY_FR_us:
- res_val = timedelta(microseconds=value)
- elif reso == NPY_DATETIMEUNIT.NPY_FR_ms:
- res_val = timedelta(milliseconds=value)
- elif reso == NPY_DATETIMEUNIT.NPY_FR_s:
- res_val = timedelta(seconds=value)
- elif reso == NPY_DATETIMEUNIT.NPY_FR_m:
- res_val = timedelta(minutes=value)
- elif reso == NPY_DATETIMEUNIT.NPY_FR_h:
- res_val = timedelta(hours=value)
- elif reso == NPY_DATETIMEUNIT.NPY_FR_D:
- res_val = timedelta(days=value)
- elif reso == NPY_DATETIMEUNIT.NPY_FR_W:
- res_val = timedelta(weeks=value)
- else:
- # Month, Year, NPY_FR_GENERIC, pico, femto, atto
- raise NotImplementedError(reso)
- # Note: we can index result directly instead of using PyArray_MultiIter_DATA
- # like we do for the other functions because result is known C-contiguous
- # and is the first argument to PyArray_MultiIterNew2. The usual pattern
- # does not seem to work with object dtype.
- # See discussion at
- # github.com/pandas-dev/pandas/pull/46886#discussion_r860261305
- res_flat[i] = res_val
- cnp.PyArray_ITER_NEXT(it)
- return result
- # ----------------------------------------------------------------------
- cpdef int64_t delta_to_nanoseconds(
- delta,
- NPY_DATETIMEUNIT reso=NPY_FR_ns,
- bint round_ok=True,
- ) except? -1:
- # Note: this will raise on timedelta64 with Y or M unit
- cdef:
- NPY_DATETIMEUNIT in_reso
- int64_t n
- if is_tick_object(delta):
- n = delta.n
- in_reso = delta._creso
- elif isinstance(delta, _Timedelta):
- n = delta._value
- in_reso = delta._creso
- elif is_timedelta64_object(delta):
- in_reso = get_datetime64_unit(delta)
- if in_reso == NPY_DATETIMEUNIT.NPY_FR_Y or in_reso == NPY_DATETIMEUNIT.NPY_FR_M:
- raise ValueError(
- "delta_to_nanoseconds does not support Y or M units, "
- "as their duration in nanoseconds is ambiguous."
- )
- n = get_timedelta64_value(delta)
- elif PyDelta_Check(delta):
- in_reso = NPY_DATETIMEUNIT.NPY_FR_us
- try:
- n = (
- delta.days * 24 * 3600 * 1_000_000
- + delta.seconds * 1_000_000
- + delta.microseconds
- )
- except OverflowError as err:
- raise OutOfBoundsTimedelta(*err.args) from err
- else:
- raise TypeError(type(delta))
- try:
- return convert_reso(n, in_reso, reso, round_ok=round_ok)
- except (OutOfBoundsDatetime, OverflowError) as err:
- # Catch OutOfBoundsDatetime bc convert_reso can call check_dts_bounds
- # for Y/M-resolution cases
- unit_str = npy_unit_to_abbrev(reso)
- raise OutOfBoundsTimedelta(
- f"Cannot cast {str(delta)} to unit={unit_str} without overflow."
- ) from err
- @cython.overflowcheck(True)
- cdef object ensure_td64ns(object ts):
- """
- Overflow-safe implementation of td64.astype("m8[ns]")
- Parameters
- ----------
- ts : np.timedelta64
- Returns
- -------
- np.timedelta64[ns]
- """
- cdef:
- NPY_DATETIMEUNIT td64_unit
- int64_t td64_value, mult
- str unitstr
- td64_unit = get_datetime64_unit(ts)
- if (
- td64_unit != NPY_DATETIMEUNIT.NPY_FR_ns
- and td64_unit != NPY_DATETIMEUNIT.NPY_FR_GENERIC
- ):
- unitstr = npy_unit_to_abbrev(td64_unit)
- td64_value = get_timedelta64_value(ts)
- mult = precision_from_unit(unitstr)[0]
- try:
- # NB: cython#1381 this cannot be *=
- td64_value = td64_value * mult
- except OverflowError as err:
- raise OutOfBoundsTimedelta(ts) from err
- return np.timedelta64(td64_value, "ns")
- return ts
- cdef convert_to_timedelta64(object ts, str unit):
- """
- Convert an incoming object to a timedelta64 if possible.
- Before calling, unit must be standardized to avoid repeated unit conversion
- Handle these types of objects:
- - timedelta/Timedelta
- - timedelta64
- - an offset
- - np.int64 (with unit providing a possible modifier)
- - None/NaT
- Return an ns based int64
- """
- # Caller is responsible for checking unit not in ["Y", "y", "M"]
- if checknull_with_nat(ts):
- return np.timedelta64(NPY_NAT, "ns")
- elif isinstance(ts, _Timedelta):
- # already in the proper format
- if ts._creso != NPY_FR_ns:
- ts = ts.as_unit("ns").asm8
- else:
- ts = np.timedelta64(ts._value, "ns")
- elif is_timedelta64_object(ts):
- ts = ensure_td64ns(ts)
- elif is_integer_object(ts):
- if ts == NPY_NAT:
- return np.timedelta64(NPY_NAT, "ns")
- else:
- ts = _maybe_cast_from_unit(ts, unit)
- elif is_float_object(ts):
- ts = _maybe_cast_from_unit(ts, unit)
- elif isinstance(ts, str):
- if (len(ts) > 0 and ts[0] == "P") or (len(ts) > 1 and ts[:2] == "-P"):
- ts = parse_iso_format_string(ts)
- else:
- ts = parse_timedelta_string(ts)
- ts = np.timedelta64(ts, "ns")
- elif is_tick_object(ts):
- ts = np.timedelta64(ts.nanos, "ns")
- if PyDelta_Check(ts):
- ts = np.timedelta64(delta_to_nanoseconds(ts), "ns")
- elif not is_timedelta64_object(ts):
- raise TypeError(f"Invalid type for timedelta scalar: {type(ts)}")
- return ts.astype("timedelta64[ns]")
- cdef _maybe_cast_from_unit(ts, str unit):
- # caller is responsible for checking
- # assert unit not in ["Y", "y", "M"]
- try:
- ts = cast_from_unit(ts, unit)
- except OutOfBoundsDatetime as err:
- raise OutOfBoundsTimedelta(
- f"Cannot cast {ts} from {unit} to 'ns' without overflow."
- ) from err
- ts = np.timedelta64(ts, "ns")
- return ts
- @cython.boundscheck(False)
- @cython.wraparound(False)
- def array_to_timedelta64(
- ndarray values, str unit=None, str errors="raise"
- ) -> ndarray:
- # values is object-dtype, may be 2D
- """
- Convert an ndarray to an array of timedeltas. If errors == 'coerce',
- coerce non-convertible objects to NaT. Otherwise, raise.
- Returns
- -------
- np.ndarray[timedelta64ns]
- """
- # Caller is responsible for checking
- assert unit not in ["Y", "y", "M"]
- cdef:
- Py_ssize_t i, n = values.size
- ndarray result = np.empty((<object>values).shape, dtype="m8[ns]")
- object item
- int64_t ival
- cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, values)
- cnp.flatiter it
- if values.descr.type_num != cnp.NPY_OBJECT:
- # raise here otherwise we segfault below
- raise TypeError("array_to_timedelta64 'values' must have object dtype")
- if errors not in {"ignore", "raise", "coerce"}:
- raise ValueError("errors must be one of {'ignore', 'raise', or 'coerce'}")
- if unit is not None and errors != "coerce":
- it = cnp.PyArray_IterNew(values)
- for i in range(n):
- # Analogous to: item = values[i]
- item = cnp.PyArray_GETITEM(values, cnp.PyArray_ITER_DATA(it))
- if isinstance(item, str):
- raise ValueError(
- "unit must not be specified if the input contains a str"
- )
- cnp.PyArray_ITER_NEXT(it)
- # Usually, we have all strings. If so, we hit the fast path.
- # If this path fails, we try conversion a different way, and
- # this is where all of the error handling will take place.
- try:
- for i in range(n):
- # Analogous to: item = values[i]
- item = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
- ival = _item_to_timedelta64_fastpath(item)
- # Analogous to: iresult[i] = ival
- (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival
- cnp.PyArray_MultiIter_NEXT(mi)
- except (TypeError, ValueError):
- cnp.PyArray_MultiIter_RESET(mi)
- parsed_unit = parse_timedelta_unit(unit or "ns")
- for i in range(n):
- item = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
- ival = _item_to_timedelta64(item, parsed_unit, errors)
- (<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival
- cnp.PyArray_MultiIter_NEXT(mi)
- return result
- cdef int64_t _item_to_timedelta64_fastpath(object item) except? -1:
- """
- See array_to_timedelta64.
- """
- if item is NaT:
- # we allow this check in the fast-path because NaT is a C-object
- # so this is an inexpensive check
- return NPY_NAT
- else:
- return parse_timedelta_string(item)
- cdef int64_t _item_to_timedelta64(
- object item,
- str parsed_unit,
- str errors
- ) except? -1:
- """
- See array_to_timedelta64.
- """
- try:
- return get_timedelta64_value(convert_to_timedelta64(item, parsed_unit))
- except ValueError as err:
- if errors == "coerce":
- return NPY_NAT
- elif "unit abbreviation w/o a number" in str(err):
- # re-raise with more pertinent message
- msg = f"Could not convert '{item}' to NumPy timedelta"
- raise ValueError(msg) from err
- else:
- raise
- @cython.cpow(True)
- cdef int64_t parse_timedelta_string(str ts) except? -1:
- """
- Parse a regular format timedelta string. Return an int64_t (in ns)
- or raise a ValueError on an invalid parse.
- """
- cdef:
- unicode c
- bint neg = 0, have_dot = 0, have_value = 0, have_hhmmss = 0
- object current_unit = None
- int64_t result = 0, m = 0, r
- list number = [], frac = [], unit = []
- # neg : tracks if we have a leading negative for the value
- # have_dot : tracks if we are processing a dot (either post hhmmss or
- # inside an expression)
- # have_value : track if we have at least 1 leading unit
- # have_hhmmss : tracks if we have a regular format hh:mm:ss
- if len(ts) == 0 or ts in nat_strings:
- return NPY_NAT
- for c in ts:
- # skip whitespace / commas
- if c == " " or c == ",":
- pass
- # positive signs are ignored
- elif c == "+":
- pass
- # neg
- elif c == "-":
- if neg or have_value or have_hhmmss:
- raise ValueError("only leading negative signs are allowed")
- neg = 1
- # number (ascii codes)
- elif ord(c) >= 48 and ord(c) <= 57:
- if have_dot:
- # we found a dot, but now its just a fraction
- if len(unit):
- number.append(c)
- have_dot = 0
- else:
- frac.append(c)
- elif not len(unit):
- number.append(c)
- else:
- r = timedelta_from_spec(number, frac, unit)
- unit, number, frac = [], [c], []
- result += timedelta_as_neg(r, neg)
- # hh:mm:ss.
- elif c == ":":
- # we flip this off if we have a leading value
- if have_value:
- neg = 0
- # we are in the pattern hh:mm:ss pattern
- if len(number):
- if current_unit is None:
- current_unit = "h"
- m = 1000000000 * 3600
- elif current_unit == "h":
- current_unit = "m"
- m = 1000000000 * 60
- elif current_unit == "m":
- current_unit = "s"
- m = 1000000000
- r = <int64_t>int("".join(number)) * m
- result += timedelta_as_neg(r, neg)
- have_hhmmss = 1
- else:
- raise ValueError(f"expecting hh:mm:ss format, received: {ts}")
- unit, number = [], []
- # after the decimal point
- elif c == ".":
- if len(number) and current_unit is not None:
- # by definition we had something like
- # so we need to evaluate the final field from a
- # hh:mm:ss (so current_unit is 'm')
- if current_unit != "m":
- raise ValueError("expected hh:mm:ss format before .")
- m = 1000000000
- r = <int64_t>int("".join(number)) * m
- result += timedelta_as_neg(r, neg)
- have_value = 1
- unit, number, frac = [], [], []
- have_dot = 1
- # unit
- else:
- unit.append(c)
- have_value = 1
- have_dot = 0
- # we had a dot, but we have a fractional
- # value since we have an unit
- if have_dot and len(unit):
- r = timedelta_from_spec(number, frac, unit)
- result += timedelta_as_neg(r, neg)
- # we have a dot as part of a regular format
- # e.g. hh:mm:ss.fffffff
- elif have_dot:
- if ((len(number) or len(frac)) and not len(unit)
- and current_unit is None):
- raise ValueError("no units specified")
- if len(frac) > 0 and len(frac) <= 3:
- m = 10**(3 -len(frac)) * 1000 * 1000
- elif len(frac) > 3 and len(frac) <= 6:
- m = 10**(6 -len(frac)) * 1000
- elif len(frac) > 6 and len(frac) <= 9:
- m = 10**(9 -len(frac))
- else:
- m = 1
- frac = frac[:9]
- r = <int64_t>int("".join(frac)) * m
- result += timedelta_as_neg(r, neg)
- # we have a regular format
- # we must have seconds at this point (hence the unit is still 'm')
- elif current_unit is not None:
- if current_unit != "m":
- raise ValueError("expected hh:mm:ss format")
- m = 1000000000
- r = <int64_t>int("".join(number)) * m
- result += timedelta_as_neg(r, neg)
- # we have a last abbreviation
- elif len(unit):
- if len(number):
- r = timedelta_from_spec(number, frac, unit)
- result += timedelta_as_neg(r, neg)
- else:
- raise ValueError("unit abbreviation w/o a number")
- # we only have symbols and no numbers
- elif len(number) == 0:
- raise ValueError("symbols w/o a number")
- # treat as nanoseconds
- # but only if we don't have anything else
- else:
- if have_value:
- raise ValueError("have leftover units")
- if len(number):
- r = timedelta_from_spec(number, frac, "ns")
- result += timedelta_as_neg(r, neg)
- return result
- cdef int64_t timedelta_as_neg(int64_t value, bint neg):
- """
- Parameters
- ----------
- value : int64_t of the timedelta value
- neg : bool if the a negative value
- """
- if neg:
- return -value
- return value
- cdef timedelta_from_spec(object number, object frac, object unit):
- """
- Parameters
- ----------
- number : a list of number digits
- frac : a list of frac digits
- unit : a list of unit characters
- """
- cdef:
- str n
- unit = "".join(unit)
- if unit in ["M", "Y", "y"]:
- raise ValueError(
- "Units 'M', 'Y' and 'y' do not represent unambiguous timedelta "
- "values and are not supported."
- )
- unit = parse_timedelta_unit(unit)
- n = "".join(number) + "." + "".join(frac)
- return cast_from_unit(float(n), unit)
- cpdef inline str parse_timedelta_unit(str unit):
- """
- Parameters
- ----------
- unit : str or None
- Returns
- -------
- str
- Canonical unit string.
- Raises
- ------
- ValueError : on non-parseable input
- """
- if unit is None:
- return "ns"
- elif unit == "M":
- return unit
- try:
- return timedelta_abbrevs[unit.lower()]
- except KeyError:
- raise ValueError(f"invalid unit abbreviation: {unit}")
- # ----------------------------------------------------------------------
- # Timedelta ops utilities
- cdef bint _validate_ops_compat(other):
- # return True if we are compat with operating
- if checknull_with_nat(other):
- return True
- elif is_any_td_scalar(other):
- return True
- elif isinstance(other, str):
- return True
- return False
- def _op_unary_method(func, name):
- def f(self):
- new_value = func(self._value)
- return _timedelta_from_value_and_reso(Timedelta, new_value, self._creso)
- f.__name__ = name
- return f
- def _binary_op_method_timedeltalike(op, name):
- # define a binary operation that only works if the other argument is
- # timedelta like or an array of timedeltalike
- def f(self, other):
- if other is NaT:
- return NaT
- elif is_datetime64_object(other) or (
- PyDateTime_Check(other) and not isinstance(other, ABCTimestamp)
- ):
- # this case is for a datetime object that is specifically
- # *not* a Timestamp, as the Timestamp case will be
- # handled after `_validate_ops_compat` returns False below
- from pandas._libs.tslibs.timestamps import Timestamp
- return op(self, Timestamp(other))
- # We are implicitly requiring the canonical behavior to be
- # defined by Timestamp methods.
- elif is_array(other):
- if other.ndim == 0:
- # see also: item_from_zerodim
- item = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), other)
- return f(self, item)
- elif other.dtype.kind in ["m", "M"]:
- return op(self.to_timedelta64(), other)
- elif other.dtype.kind == "O":
- return np.array([op(self, x) for x in other])
- else:
- return NotImplemented
- elif not _validate_ops_compat(other):
- # Includes any of our non-cython classes
- return NotImplemented
- try:
- other = Timedelta(other)
- except ValueError:
- # failed to parse as timedelta
- return NotImplemented
- if other is NaT:
- # e.g. if original other was timedelta64('NaT')
- return NaT
- # Matching numpy, we cast to the higher resolution. Unlike numpy,
- # we raise instead of silently overflowing during this casting.
- if self._creso < other._creso:
- self = (<_Timedelta>self)._as_creso(other._creso, round_ok=True)
- elif self._creso > other._creso:
- other = (<_Timedelta>other)._as_creso(self._creso, round_ok=True)
- res = op(self._value, other._value)
- if res == NPY_NAT:
- # e.g. test_implementation_limits
- # TODO: more generally could do an overflowcheck in op?
- return NaT
- return _timedelta_from_value_and_reso(Timedelta, res, reso=self._creso)
- f.__name__ = name
- return f
- # ----------------------------------------------------------------------
- # Timedelta Construction
- cdef int64_t parse_iso_format_string(str ts) except? -1:
- """
- Extracts and cleanses the appropriate values from a match object with
- groups for each component of an ISO 8601 duration
- Parameters
- ----------
- ts: str
- ISO 8601 Duration formatted string
- Returns
- -------
- ns: int64_t
- Precision in nanoseconds of matched ISO 8601 duration
- Raises
- ------
- ValueError
- If ``ts`` cannot be parsed
- """
- cdef:
- unicode c
- int64_t result = 0, r
- int p = 0, sign = 1
- object dec_unit = "ms", err_msg
- bint have_dot = 0, have_value = 0, neg = 0
- list number = [], unit = []
- err_msg = f"Invalid ISO 8601 Duration format - {ts}"
- if ts[0] == "-":
- sign = -1
- ts = ts[1:]
- for c in ts:
- # number (ascii codes)
- if 48 <= ord(c) <= 57:
- have_value = 1
- if have_dot:
- if p == 3 and dec_unit != "ns":
- unit.append(dec_unit)
- if dec_unit == "ms":
- dec_unit = "us"
- elif dec_unit == "us":
- dec_unit = "ns"
- p = 0
- p += 1
- if not len(unit):
- number.append(c)
- else:
- r = timedelta_from_spec(number, "0", unit)
- result += timedelta_as_neg(r, neg)
- neg = 0
- unit, number = [], [c]
- else:
- if c == "P" or c == "T":
- pass # ignore marking characters P and T
- elif c == "-":
- if neg or have_value:
- raise ValueError(err_msg)
- else:
- neg = 1
- elif c == "+":
- pass
- elif c in ["W", "D", "H", "M"]:
- if c in ["H", "M"] and len(number) > 2:
- raise ValueError(err_msg)
- if c == "M":
- c = "min"
- unit.append(c)
- r = timedelta_from_spec(number, "0", unit)
- result += timedelta_as_neg(r, neg)
- neg = 0
- unit, number = [], []
- elif c == ".":
- # append any seconds
- if len(number):
- r = timedelta_from_spec(number, "0", "S")
- result += timedelta_as_neg(r, neg)
- unit, number = [], []
- have_dot = 1
- elif c == "S":
- if have_dot: # ms, us, or ns
- if not len(number) or p > 3:
- raise ValueError(err_msg)
- # pad to 3 digits as required
- pad = 3 - p
- while pad > 0:
- number.append("0")
- pad -= 1
- r = timedelta_from_spec(number, "0", dec_unit)
- result += timedelta_as_neg(r, neg)
- else: # seconds
- r = timedelta_from_spec(number, "0", "S")
- result += timedelta_as_neg(r, neg)
- else:
- raise ValueError(err_msg)
- if not have_value:
- # Received string only - never parsed any values
- raise ValueError(err_msg)
- return sign*result
- cdef _to_py_int_float(v):
- # Note: This used to be defined inside Timedelta.__new__
- # but cython will not allow `cdef` functions to be defined dynamically.
- if is_integer_object(v):
- return int(v)
- elif is_float_object(v):
- return float(v)
- raise TypeError(f"Invalid type {type(v)}. Must be int or float.")
- def _timedelta_unpickle(value, reso):
- return _timedelta_from_value_and_reso(Timedelta, value, reso)
- cdef _timedelta_from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso):
- # Could make this a classmethod if/when cython supports cdef classmethods
- cdef:
- _Timedelta td_base
- assert value != NPY_NAT
- # For millisecond and second resos, we cannot actually pass int(value) because
- # many cases would fall outside of the pytimedelta implementation bounds.
- # We pass 0 instead, and override seconds, microseconds, days.
- # In principle we could pass 0 for ns and us too.
- if reso == NPY_FR_ns:
- td_base = _Timedelta.__new__(cls, microseconds=int(value) // 1000)
- elif reso == NPY_DATETIMEUNIT.NPY_FR_us:
- td_base = _Timedelta.__new__(cls, microseconds=int(value))
- elif reso == NPY_DATETIMEUNIT.NPY_FR_ms:
- td_base = _Timedelta.__new__(cls, milliseconds=0)
- elif reso == NPY_DATETIMEUNIT.NPY_FR_s:
- td_base = _Timedelta.__new__(cls, seconds=0)
- # Other resolutions are disabled but could potentially be implemented here:
- # elif reso == NPY_DATETIMEUNIT.NPY_FR_m:
- # td_base = _Timedelta.__new__(Timedelta, minutes=int(value))
- # elif reso == NPY_DATETIMEUNIT.NPY_FR_h:
- # td_base = _Timedelta.__new__(Timedelta, hours=int(value))
- # elif reso == NPY_DATETIMEUNIT.NPY_FR_D:
- # td_base = _Timedelta.__new__(Timedelta, days=int(value))
- else:
- raise NotImplementedError(
- "Only resolutions 's', 'ms', 'us', 'ns' are supported."
- )
- td_base._value= value
- td_base._is_populated = 0
- td_base._creso = reso
- return td_base
- class MinMaxReso:
- """
- We need to define min/max/resolution on both the Timedelta _instance_
- and Timedelta class. On an instance, these depend on the object's _reso.
- On the class, we default to the values we would get with nanosecond _reso.
- """
- def __init__(self, name):
- self._name = name
- def __get__(self, obj, type=None):
- if self._name == "min":
- val = np.iinfo(np.int64).min + 1
- elif self._name == "max":
- val = np.iinfo(np.int64).max
- else:
- assert self._name == "resolution"
- val = 1
- if obj is None:
- # i.e. this is on the class, default to nanos
- return Timedelta(val)
- else:
- return Timedelta._from_value_and_reso(val, obj._creso)
- def __set__(self, obj, value):
- raise AttributeError(f"{self._name} is not settable.")
- # Similar to Timestamp/datetime, this is a construction requirement for
- # timedeltas that we need to do object instantiation in python. This will
- # serve as a C extension type that shadows the Python class, where we do any
- # heavy lifting.
- cdef class _Timedelta(timedelta):
- # cdef readonly:
- # int64_t value # nanoseconds
- # bint _is_populated # are my components populated
- # int64_t _d, _h, _m, _s, _ms, _us, _ns
- # NPY_DATETIMEUNIT _reso
- # higher than np.ndarray and np.matrix
- __array_priority__ = 100
- min = MinMaxReso("min")
- max = MinMaxReso("max")
- resolution = MinMaxReso("resolution")
- @property
- def value(self):
- try:
- return convert_reso(self._value, self._creso, NPY_FR_ns, False)
- except OverflowError:
- raise OverflowError(
- "Cannot convert Timedelta to nanoseconds without overflow. "
- "Use `.asm8.view('i8')` to cast represent Timedelta in its own "
- f"unit (here, {self.unit})."
- )
- @property
- def _unit(self) -> str:
- """
- The abbreviation associated with self._creso.
- """
- return npy_unit_to_abbrev(self._creso)
- @property
- def days(self) -> int: # TODO(cython3): make cdef property
- """
- Returns the days of the timedelta.
- Returns
- -------
- int
- Examples
- --------
- >>> td = pd.Timedelta(1, "d")
- >>> td.days
- 1
- >>> td = pd.Timedelta('4 min 3 us 42 ns')
- >>> td.days
- 0
- """
- # NB: using the python C-API PyDateTime_DELTA_GET_DAYS will fail
- # (or be incorrect)
- self._ensure_components()
- return self._d
- @property
- def seconds(self) -> int: # TODO(cython3): make cdef property
- """
- Return the total hours, minutes, and seconds of the timedelta as seconds.
- Timedelta.seconds = hours * 3600 + minutes * 60 + seconds.
- Returns
- -------
- int
- Number of seconds.
- See Also
- --------
- Timedelta.components : Return all attributes with assigned values
- (i.e. days, hours, minutes, seconds, milliseconds, microseconds,
- nanoseconds).
- Examples
- --------
- **Using string input**
- >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns')
- >>> td.seconds
- 120
- **Using integer input**
- >>> td = pd.Timedelta(42, unit='s')
- >>> td.seconds
- 42
- """
- # NB: using the python C-API PyDateTime_DELTA_GET_SECONDS will fail
- # (or be incorrect)
- self._ensure_components()
- return self._h * 3600 + self._m * 60 + self._s
- @property
- def microseconds(self) -> int: # TODO(cython3): make cdef property
- # NB: using the python C-API PyDateTime_DELTA_GET_MICROSECONDS will fail
- # (or be incorrect)
- self._ensure_components()
- return self._ms * 1000 + self._us
- def total_seconds(self) -> float:
- """Total seconds in the duration."""
- # We need to override bc we overrided days/seconds/microseconds
- # TODO: add nanos/1e9?
- return self.days * 24 * 3600 + self.seconds + self.microseconds / 1_000_000
- @property
- def unit(self) -> str:
- return npy_unit_to_abbrev(self._creso)
- def __hash__(_Timedelta self):
- if self._has_ns():
- # Note: this does *not* satisfy the invariance
- # td1 == td2 \\Rightarrow hash(td1) == hash(td2)
- # if td1 and td2 have different _resos. timedelta64 also has this
- # non-invariant behavior.
- # see GH#44504
- return hash(self._value)
- elif self._is_in_pytimedelta_bounds() and (
- self._creso == NPY_FR_ns or self._creso == NPY_DATETIMEUNIT.NPY_FR_us
- ):
- # If we can defer to timedelta.__hash__, do so, as that
- # ensures the hash is invariant to our _reso.
- # We can only defer for ns and us, as for these two resos we
- # call _Timedelta.__new__ with the correct input in
- # _timedelta_from_value_and_reso; so timedelta.__hash__
- # will be correct
- return timedelta.__hash__(self)
- else:
- # We want to ensure that two equivalent Timedelta objects
- # have the same hash. So we try downcasting to the next-lowest
- # resolution.
- try:
- obj = (<_Timedelta>self)._as_creso(<NPY_DATETIMEUNIT>(self._creso + 1))
- except OverflowError:
- # Doesn't fit, so we're off the hook
- return hash(self._value)
- else:
- return hash(obj)
- def __richcmp__(_Timedelta self, object other, int op):
- cdef:
- _Timedelta ots
- if isinstance(other, _Timedelta):
- ots = other
- elif is_any_td_scalar(other):
- try:
- ots = Timedelta(other)
- except OutOfBoundsTimedelta as err:
- # GH#49021 pytimedelta.max overflows
- if not PyDelta_Check(other):
- # TODO: handle this case
- raise
- ltup = (self.days, self.seconds, self.microseconds, self.nanoseconds)
- rtup = (other.days, other.seconds, other.microseconds, 0)
- if op == Py_EQ:
- return ltup == rtup
- elif op == Py_NE:
- return ltup != rtup
- elif op == Py_LT:
- return ltup < rtup
- elif op == Py_LE:
- return ltup <= rtup
- elif op == Py_GT:
- return ltup > rtup
- elif op == Py_GE:
- return ltup >= rtup
- elif other is NaT:
- return op == Py_NE
- elif util.is_array(other):
- if other.dtype.kind == "m":
- return PyObject_RichCompare(self.asm8, other, op)
- elif other.dtype.kind == "O":
- # operate element-wise
- return np.array(
- [PyObject_RichCompare(self, x, op) for x in other],
- dtype=bool,
- )
- if op == Py_EQ:
- return np.zeros(other.shape, dtype=bool)
- elif op == Py_NE:
- return np.ones(other.shape, dtype=bool)
- return NotImplemented # let other raise TypeError
- else:
- return NotImplemented
- if self._creso == ots._creso:
- return cmp_scalar(self._value, ots._value, op)
- return self._compare_mismatched_resos(ots, op)
- # TODO: re-use/share with Timestamp
- cdef bint _compare_mismatched_resos(self, _Timedelta other, op):
- # Can't just dispatch to numpy as they silently overflow and get it wrong
- cdef:
- npy_datetimestruct dts_self
- npy_datetimestruct dts_other
- # dispatch to the datetimestruct utils instead of writing new ones!
- pandas_datetime_to_datetimestruct(self._value, self._creso, &dts_self)
- pandas_datetime_to_datetimestruct(other._value, other._creso, &dts_other)
- return cmp_dtstructs(&dts_self, &dts_other, op)
- cdef bint _has_ns(self):
- if self._creso == NPY_FR_ns:
- return self._value % 1000 != 0
- elif self._creso < NPY_FR_ns:
- # i.e. seconds, millisecond, microsecond
- return False
- else:
- raise NotImplementedError(self._creso)
- cdef bint _is_in_pytimedelta_bounds(self):
- """
- Check if we are within the bounds of datetime.timedelta.
- """
- self._ensure_components()
- return -999999999 <= self._d and self._d <= 999999999
- cdef _ensure_components(_Timedelta self):
- """
- compute the components
- """
- if self._is_populated:
- return
- cdef:
- pandas_timedeltastruct tds
- pandas_timedelta_to_timedeltastruct(self._value, self._creso, &tds)
- self._d = tds.days
- self._h = tds.hrs
- self._m = tds.min
- self._s = tds.sec
- self._ms = tds.ms
- self._us = tds.us
- self._ns = tds.ns
- self._seconds = tds.seconds
- self._microseconds = tds.microseconds
- self._is_populated = 1
- cpdef timedelta to_pytimedelta(_Timedelta self):
- """
- Convert a pandas Timedelta object into a python ``datetime.timedelta`` object.
- Timedelta objects are internally saved as numpy datetime64[ns] dtype.
- Use to_pytimedelta() to convert to object dtype.
- Returns
- -------
- datetime.timedelta or numpy.array of datetime.timedelta
- See Also
- --------
- to_timedelta : Convert argument to Timedelta type.
- Notes
- -----
- Any nanosecond resolution will be lost.
- """
- if self._creso == NPY_FR_ns:
- return timedelta(microseconds=int(self._value) / 1000)
- # TODO(@WillAyd): is this the right way to use components?
- self._ensure_components()
- return timedelta(
- days=self._d, seconds=self._seconds, microseconds=self._microseconds
- )
- def to_timedelta64(self) -> np.timedelta64:
- """
- Return a numpy.timedelta64 object with 'ns' precision.
- """
- cdef:
- str abbrev = npy_unit_to_abbrev(self._creso)
- # TODO: way to create a np.timedelta64 obj with the reso directly
- # instead of having to get the abbrev?
- return np.timedelta64(self._value, abbrev)
- def to_numpy(self, dtype=None, copy=False) -> np.timedelta64:
- """
- Convert the Timedelta to a NumPy timedelta64.
- This is an alias method for `Timedelta.to_timedelta64()`. The dtype and
- copy parameters are available here only for compatibility. Their values
- will not affect the return value.
- Returns
- -------
- numpy.timedelta64
- See Also
- --------
- Series.to_numpy : Similar method for Series.
- """
- if dtype is not None or copy is not False:
- raise ValueError(
- "Timedelta.to_numpy dtype and copy arguments are ignored"
- )
- return self.to_timedelta64()
- def view(self, dtype):
- """
- Array view compatibility.
- Parameters
- ----------
- dtype : str or dtype
- The dtype to view the underlying data as.
- """
- return np.timedelta64(self._value).view(dtype)
- @property
- def components(self):
- """
- Return a components namedtuple-like.
- Examples
- --------
- >>> td = pd.Timedelta('2 day 4 min 3 us 42 ns')
- >>> td.components
- Components(days=2, hours=0, minutes=4, seconds=0, milliseconds=0,
- microseconds=3, nanoseconds=42)
- """
- self._ensure_components()
- # return the named tuple
- return Components(self._d, self._h, self._m, self._s,
- self._ms, self._us, self._ns)
- @property
- def asm8(self) -> np.timedelta64:
- """
- Return a numpy timedelta64 array scalar view.
- Provides access to the array scalar view (i.e. a combination of the
- value and the units) associated with the numpy.timedelta64().view(),
- including a 64-bit integer representation of the timedelta in
- nanoseconds (Python int compatible).
- Returns
- -------
- numpy timedelta64 array scalar view
- Array scalar view of the timedelta in nanoseconds.
- Examples
- --------
- >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns')
- >>> td.asm8
- numpy.timedelta64(86520000003042,'ns')
- >>> td = pd.Timedelta('2 min 3 s')
- >>> td.asm8
- numpy.timedelta64(123000000000,'ns')
- >>> td = pd.Timedelta('3 ms 5 us')
- >>> td.asm8
- numpy.timedelta64(3005000,'ns')
- >>> td = pd.Timedelta(42, unit='ns')
- >>> td.asm8
- numpy.timedelta64(42,'ns')
- """
- return self.to_timedelta64()
- @property
- def resolution_string(self) -> str:
- """
- Return a string representing the lowest timedelta resolution.
- Each timedelta has a defined resolution that represents the lowest OR
- most granular level of precision. Each level of resolution is
- represented by a short string as defined below:
- Resolution: Return value
- * Days: 'D'
- * Hours: 'H'
- * Minutes: 'T'
- * Seconds: 'S'
- * Milliseconds: 'L'
- * Microseconds: 'U'
- * Nanoseconds: 'N'
- Returns
- -------
- str
- Timedelta resolution.
- Examples
- --------
- >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns')
- >>> td.resolution_string
- 'N'
- >>> td = pd.Timedelta('1 days 2 min 3 us')
- >>> td.resolution_string
- 'U'
- >>> td = pd.Timedelta('2 min 3 s')
- >>> td.resolution_string
- 'S'
- >>> td = pd.Timedelta(36, unit='us')
- >>> td.resolution_string
- 'U'
- """
- self._ensure_components()
- if self._ns:
- return "N"
- elif self._us:
- return "U"
- elif self._ms:
- return "L"
- elif self._s:
- return "S"
- elif self._m:
- return "T"
- elif self._h:
- return "H"
- else:
- return "D"
- @property
- def nanoseconds(self):
- """
- Return the number of nanoseconds (n), where 0 <= n < 1 microsecond.
- Returns
- -------
- int
- Number of nanoseconds.
- See Also
- --------
- Timedelta.components : Return all attributes with assigned values
- (i.e. days, hours, minutes, seconds, milliseconds, microseconds,
- nanoseconds).
- Examples
- --------
- **Using string input**
- >>> td = pd.Timedelta('1 days 2 min 3 us 42 ns')
- >>> td.nanoseconds
- 42
- **Using integer input**
- >>> td = pd.Timedelta(42, unit='ns')
- >>> td.nanoseconds
- 42
- """
- self._ensure_components()
- return self._ns
- def _repr_base(self, format=None) -> str:
- """
- Parameters
- ----------
- format : None|all|sub_day|long
- Returns
- -------
- converted : string of a Timedelta
- """
- cdef:
- str sign, fmt
- dict comp_dict
- object subs
- self._ensure_components()
- if self._d < 0:
- sign = " +"
- else:
- sign = " "
- if format == "all":
- fmt = ("{days} days{sign}{hours:02}:{minutes:02}:{seconds:02}."
- "{milliseconds:03}{microseconds:03}{nanoseconds:03}")
- else:
- # if we have a partial day
- subs = (self._h or self._m or self._s or
- self._ms or self._us or self._ns)
- if self._ms or self._us or self._ns:
- seconds_fmt = "{seconds:02}.{milliseconds:03}{microseconds:03}"
- if self._ns:
- # GH#9309
- seconds_fmt += "{nanoseconds:03}"
- else:
- seconds_fmt = "{seconds:02}"
- if format == "sub_day" and not self._d:
- fmt = "{hours:02}:{minutes:02}:" + seconds_fmt
- elif subs or format == "long":
- fmt = "{days} days{sign}{hours:02}:{minutes:02}:" + seconds_fmt
- else:
- fmt = "{days} days"
- comp_dict = self.components._asdict()
- comp_dict["sign"] = sign
- return fmt.format(**comp_dict)
- def __repr__(self) -> str:
- repr_based = self._repr_base(format="long")
- return f"Timedelta('{repr_based}')"
- def __str__(self) -> str:
- return self._repr_base(format="long")
- def __bool__(self) -> bool:
- return self._value!= 0
- def isoformat(self) -> str:
- """
- Format the Timedelta as ISO 8601 Duration.
- ``P[n]Y[n]M[n]DT[n]H[n]M[n]S``, where the ``[n]`` s are replaced by the
- values. See https://en.wikipedia.org/wiki/ISO_8601#Durations.
- Returns
- -------
- str
- See Also
- --------
- Timestamp.isoformat : Function is used to convert the given
- Timestamp object into the ISO format.
- Notes
- -----
- The longest component is days, whose value may be larger than
- 365.
- Every component is always included, even if its value is 0.
- Pandas uses nanosecond precision, so up to 9 decimal places may
- be included in the seconds component.
- Trailing 0's are removed from the seconds component after the decimal.
- We do not 0 pad components, so it's `...T5H...`, not `...T05H...`
- Examples
- --------
- >>> td = pd.Timedelta(days=6, minutes=50, seconds=3,
- ... milliseconds=10, microseconds=10, nanoseconds=12)
- >>> td.isoformat()
- 'P6DT0H50M3.010010012S'
- >>> pd.Timedelta(hours=1, seconds=10).isoformat()
- 'P0DT1H0M10S'
- >>> pd.Timedelta(days=500.5).isoformat()
- 'P500DT12H0M0S'
- """
- components = self.components
- seconds = (f"{components.seconds}."
- f"{components.milliseconds:0>3}"
- f"{components.microseconds:0>3}"
- f"{components.nanoseconds:0>3}")
- # Trim unnecessary 0s, 1.000000000 -> 1
- seconds = seconds.rstrip("0").rstrip(".")
- tpl = (f"P{components.days}DT{components.hours}"
- f"H{components.minutes}M{seconds}S")
- return tpl
- # ----------------------------------------------------------------
- # Constructors
- @classmethod
- def _from_value_and_reso(cls, int64_t value, NPY_DATETIMEUNIT reso):
- # exposing as classmethod for testing
- return _timedelta_from_value_and_reso(cls, value, reso)
- def as_unit(self, str unit, bint round_ok=True):
- """
- Convert the underlying int64 representaton to the given unit.
- Parameters
- ----------
- unit : {"ns", "us", "ms", "s"}
- round_ok : bool, default True
- If False and the conversion requires rounding, raise.
- Returns
- -------
- Timedelta
- """
- dtype = np.dtype(f"m8[{unit}]")
- reso = get_unit_from_dtype(dtype)
- return self._as_creso(reso, round_ok=round_ok)
- @cython.cdivision(False)
- cdef _Timedelta _as_creso(self, NPY_DATETIMEUNIT reso, bint round_ok=True):
- cdef:
- int64_t value
- if reso == self._creso:
- return self
- try:
- value = convert_reso(self._value, self._creso, reso, round_ok=round_ok)
- except OverflowError as err:
- unit = npy_unit_to_abbrev(reso)
- raise OutOfBoundsTimedelta(
- f"Cannot cast {self} to unit='{unit}' without overflow."
- ) from err
- return type(self)._from_value_and_reso(value, reso=reso)
- cpdef _maybe_cast_to_matching_resos(self, _Timedelta other):
- """
- If _resos do not match, cast to the higher resolution, raising on overflow.
- """
- if self._creso > other._creso:
- other = other._as_creso(self._creso)
- elif self._creso < other._creso:
- self = self._as_creso(other._creso)
- return self, other
- # Python front end to C extension type _Timedelta
- # This serves as the box for timedelta64
- class Timedelta(_Timedelta):
- """
- Represents a duration, the difference between two dates or times.
- Timedelta is the pandas equivalent of python's ``datetime.timedelta``
- and is interchangeable with it in most cases.
- Parameters
- ----------
- value : Timedelta, timedelta, np.timedelta64, str, or int
- unit : str, default 'ns'
- Denote the unit of the input, if input is an integer.
- Possible values:
- * 'W', 'D', 'T', 'S', 'L', 'U', or 'N'
- * 'days' or 'day'
- * 'hours', 'hour', 'hr', or 'h'
- * 'minutes', 'minute', 'min', or 'm'
- * 'seconds', 'second', or 'sec'
- * 'milliseconds', 'millisecond', 'millis', or 'milli'
- * 'microseconds', 'microsecond', 'micros', or 'micro'
- * 'nanoseconds', 'nanosecond', 'nanos', 'nano', or 'ns'.
- **kwargs
- Available kwargs: {days, seconds, microseconds,
- milliseconds, minutes, hours, weeks}.
- Values for construction in compat with datetime.timedelta.
- Numpy ints and floats will be coerced to python ints and floats.
- Notes
- -----
- The constructor may take in either both values of value and unit or
- kwargs as above. Either one of them must be used during initialization
- The ``.value`` attribute is always in ns.
- If the precision is higher than nanoseconds, the precision of the duration is
- truncated to nanoseconds.
- Examples
- --------
- Here we initialize Timedelta object with both value and unit
- >>> td = pd.Timedelta(1, "d")
- >>> td
- Timedelta('1 days 00:00:00')
- Here we initialize the Timedelta object with kwargs
- >>> td2 = pd.Timedelta(days=1)
- >>> td2
- Timedelta('1 days 00:00:00')
- We see that either way we get the same result
- """
- _req_any_kwargs_new = {"weeks", "days", "hours", "minutes", "seconds",
- "milliseconds", "microseconds", "nanoseconds"}
- def __new__(cls, object value=_no_input, unit=None, **kwargs):
- if value is _no_input:
- if not len(kwargs):
- raise ValueError("cannot construct a Timedelta without a "
- "value/unit or descriptive keywords "
- "(days,seconds....)")
- kwargs = {key: _to_py_int_float(kwargs[key]) for key in kwargs}
- unsupported_kwargs = set(kwargs)
- unsupported_kwargs.difference_update(cls._req_any_kwargs_new)
- if unsupported_kwargs or not cls._req_any_kwargs_new.intersection(kwargs):
- raise ValueError(
- "cannot construct a Timedelta from the passed arguments, "
- "allowed keywords are "
- "[weeks, days, hours, minutes, seconds, "
- "milliseconds, microseconds, nanoseconds]"
- )
- # GH43764, convert any input to nanoseconds first and then
- # create the timestamp. This ensures that any potential
- # nanosecond contributions from kwargs parsed as floats
- # are taken into consideration.
- seconds = int((
- (
- (kwargs.get("days", 0) + kwargs.get("weeks", 0) * 7) * 24
- + kwargs.get("hours", 0)
- ) * 3600
- + kwargs.get("minutes", 0) * 60
- + kwargs.get("seconds", 0)
- ) * 1_000_000_000
- )
- value = np.timedelta64(
- int(kwargs.get("nanoseconds", 0))
- + int(kwargs.get("microseconds", 0) * 1_000)
- + int(kwargs.get("milliseconds", 0) * 1_000_000)
- + seconds
- )
- if unit in {"Y", "y", "M"}:
- raise ValueError(
- "Units 'M', 'Y', and 'y' are no longer supported, as they do not "
- "represent unambiguous timedelta values durations."
- )
- # GH 30543 if pd.Timedelta already passed, return it
- # check that only value is passed
- if isinstance(value, _Timedelta):
- # 'unit' is benign in this case, but e.g. days or seconds
- # doesn't make sense here.
- if len(kwargs):
- # GH#48898
- raise ValueError(
- "Cannot pass both a Timedelta input and timedelta keyword "
- "arguments, got "
- f"{list(kwargs.keys())}"
- )
- return value
- elif isinstance(value, str):
- if unit is not None:
- raise ValueError("unit must not be specified if the value is a str")
- if (len(value) > 0 and value[0] == "P") or (
- len(value) > 1 and value[:2] == "-P"
- ):
- value = parse_iso_format_string(value)
- else:
- value = parse_timedelta_string(value)
- value = np.timedelta64(value)
- elif PyDelta_Check(value):
- # pytimedelta object -> microsecond resolution
- new_value = delta_to_nanoseconds(
- value, reso=NPY_DATETIMEUNIT.NPY_FR_us
- )
- return cls._from_value_and_reso(
- new_value, reso=NPY_DATETIMEUNIT.NPY_FR_us
- )
- elif is_timedelta64_object(value):
- # Retain the resolution if possible, otherwise cast to the nearest
- # supported resolution.
- new_value = get_timedelta64_value(value)
- if new_value == NPY_NAT:
- # i.e. np.timedelta64("NaT")
- return NaT
- reso = get_datetime64_unit(value)
- new_reso = get_supported_reso(reso)
- if reso != NPY_DATETIMEUNIT.NPY_FR_GENERIC:
- try:
- new_value = convert_reso(
- new_value,
- reso,
- new_reso,
- round_ok=True,
- )
- except (OverflowError, OutOfBoundsDatetime) as err:
- raise OutOfBoundsTimedelta(value) from err
- return cls._from_value_and_reso(new_value, reso=new_reso)
- elif is_tick_object(value):
- new_reso = get_supported_reso(value._creso)
- new_value = delta_to_nanoseconds(value, reso=new_reso)
- return cls._from_value_and_reso(new_value, reso=new_reso)
- elif is_integer_object(value) or is_float_object(value):
- # unit=None is de-facto 'ns'
- unit = parse_timedelta_unit(unit)
- value = convert_to_timedelta64(value, unit)
- elif checknull_with_nat(value):
- return NaT
- else:
- raise ValueError(
- "Value must be Timedelta, string, integer, "
- f"float, timedelta or convertible, not {type(value).__name__}"
- )
- if is_timedelta64_object(value):
- value = value.view("i8")
- # nat
- if value == NPY_NAT:
- return NaT
- return _timedelta_from_value_and_reso(cls, value, NPY_FR_ns)
- def __setstate__(self, state):
- if len(state) == 1:
- # older pickle, only supported nanosecond
- value = state[0]
- reso = NPY_FR_ns
- else:
- value, reso = state
- self._value= value
- self._creso = reso
- def __reduce__(self):
- object_state = self._value, self._creso
- return (_timedelta_unpickle, object_state)
- @cython.cdivision(True)
- def _round(self, freq, mode):
- cdef:
- int64_t result, unit
- ndarray[int64_t] arr
- from pandas._libs.tslibs.offsets import to_offset
- to_offset(freq).nanos # raises on non-fixed freq
- unit = delta_to_nanoseconds(to_offset(freq), self._creso)
- arr = np.array([self._value], dtype="i8")
- result = round_nsint64(arr, mode, unit)[0]
- return Timedelta._from_value_and_reso(result, self._creso)
- def round(self, freq):
- """
- Round the Timedelta to the specified resolution.
- Parameters
- ----------
- freq : str
- Frequency string indicating the rounding resolution.
- Returns
- -------
- a new Timedelta rounded to the given resolution of `freq`
- Raises
- ------
- ValueError if the freq cannot be converted
- """
- return self._round(freq, RoundTo.NEAREST_HALF_EVEN)
- def floor(self, freq):
- """
- Return a new Timedelta floored to this resolution.
- Parameters
- ----------
- freq : str
- Frequency string indicating the flooring resolution.
- """
- return self._round(freq, RoundTo.MINUS_INFTY)
- def ceil(self, freq):
- """
- Return a new Timedelta ceiled to this resolution.
- Parameters
- ----------
- freq : str
- Frequency string indicating the ceiling resolution.
- """
- return self._round(freq, RoundTo.PLUS_INFTY)
- # ----------------------------------------------------------------
- # Arithmetic Methods
- # TODO: Can some of these be defined in the cython class?
- __neg__ = _op_unary_method(lambda x: -x, "__neg__")
- __pos__ = _op_unary_method(lambda x: x, "__pos__")
- __abs__ = _op_unary_method(lambda x: abs(x), "__abs__")
- __add__ = _binary_op_method_timedeltalike(lambda x, y: x + y, "__add__")
- __radd__ = _binary_op_method_timedeltalike(lambda x, y: x + y, "__radd__")
- __sub__ = _binary_op_method_timedeltalike(lambda x, y: x - y, "__sub__")
- __rsub__ = _binary_op_method_timedeltalike(lambda x, y: y - x, "__rsub__")
- def __mul__(self, other):
- if is_integer_object(other) or is_float_object(other):
- if util.is_nan(other):
- # np.nan * timedelta -> np.timedelta64("NaT"), in this case NaT
- return NaT
- return _timedelta_from_value_and_reso(
- Timedelta,
- <int64_t>(other * self._value),
- reso=self._creso,
- )
- elif is_array(other):
- if other.ndim == 0:
- # see also: item_from_zerodim
- item = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), other)
- return self.__mul__(item)
- return other * self.to_timedelta64()
- return NotImplemented
- __rmul__ = __mul__
- def __truediv__(self, other):
- if _should_cast_to_timedelta(other):
- # We interpret NaT as timedelta64("NaT")
- other = Timedelta(other)
- if other is NaT:
- return np.nan
- if other._creso != self._creso:
- self, other = self._maybe_cast_to_matching_resos(other)
- return self._value/ float(other._value)
- elif is_integer_object(other) or is_float_object(other):
- # integers or floats
- if util.is_nan(other):
- return NaT
- return Timedelta._from_value_and_reso(
- <int64_t>(self._value/ other), self._creso
- )
- elif is_array(other):
- if other.ndim == 0:
- # see also: item_from_zerodim
- item = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), other)
- return self.__truediv__(item)
- return self.to_timedelta64() / other
- return NotImplemented
- def __rtruediv__(self, other):
- if _should_cast_to_timedelta(other):
- # We interpret NaT as timedelta64("NaT")
- other = Timedelta(other)
- if other is NaT:
- return np.nan
- if self._creso != other._creso:
- self, other = self._maybe_cast_to_matching_resos(other)
- return float(other._value) / self._value
- elif is_array(other):
- if other.ndim == 0:
- # see also: item_from_zerodim
- item = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), other)
- return self.__rtruediv__(item)
- elif other.dtype.kind == "O":
- # GH#31869
- return np.array([x / self for x in other])
- # TODO: if other.dtype.kind == "m" and other.dtype != self.asm8.dtype
- # then should disallow for consistency with scalar behavior; requires
- # deprecation cycle. (or changing scalar behavior)
- return other / self.to_timedelta64()
- return NotImplemented
- def __floordiv__(self, other):
- # numpy does not implement floordiv for timedelta64 dtype, so we cannot
- # just defer
- if _should_cast_to_timedelta(other):
- # We interpret NaT as timedelta64("NaT")
- other = Timedelta(other)
- if other is NaT:
- return np.nan
- if self._creso != other._creso:
- self, other = self._maybe_cast_to_matching_resos(other)
- return self._value// other._value
- elif is_integer_object(other) or is_float_object(other):
- if util.is_nan(other):
- return NaT
- return type(self)._from_value_and_reso(self._value// other, self._creso)
- elif is_array(other):
- if other.ndim == 0:
- # see also: item_from_zerodim
- item = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), other)
- return self.__floordiv__(item)
- if other.dtype.kind == "m":
- # also timedelta-like
- with warnings.catch_warnings():
- warnings.filterwarnings(
- "ignore",
- "invalid value encountered in floor_divide",
- RuntimeWarning
- )
- result = self.asm8 // other
- mask = other.view("i8") == NPY_NAT
- if mask.any():
- # We differ from numpy here
- result = result.astype("f8")
- result[mask] = np.nan
- return result
- elif other.dtype.kind in ["i", "u", "f"]:
- if other.ndim == 0:
- return self // other.item()
- else:
- return self.to_timedelta64() // other
- raise TypeError(f"Invalid dtype {other.dtype} for __floordiv__")
- return NotImplemented
- def __rfloordiv__(self, other):
- # numpy does not implement floordiv for timedelta64 dtype, so we cannot
- # just defer
- if _should_cast_to_timedelta(other):
- # We interpret NaT as timedelta64("NaT")
- other = Timedelta(other)
- if other is NaT:
- return np.nan
- if self._creso != other._creso:
- self, other = self._maybe_cast_to_matching_resos(other)
- return other._value// self._value
- elif is_array(other):
- if other.ndim == 0:
- # see also: item_from_zerodim
- item = cnp.PyArray_ToScalar(cnp.PyArray_DATA(other), other)
- return self.__rfloordiv__(item)
- if other.dtype.kind == "m":
- # also timedelta-like
- with warnings.catch_warnings():
- warnings.filterwarnings(
- "ignore",
- "invalid value encountered in floor_divide",
- RuntimeWarning
- )
- result = other // self.asm8
- mask = other.view("i8") == NPY_NAT
- if mask.any():
- # We differ from numpy here
- result = result.astype("f8")
- result[mask] = np.nan
- return result
- # Includes integer array // Timedelta, disallowed in GH#19761
- raise TypeError(f"Invalid dtype {other.dtype} for __floordiv__")
- return NotImplemented
- def __mod__(self, other):
- # Naive implementation, room for optimization
- return self.__divmod__(other)[1]
- def __rmod__(self, other):
- # Naive implementation, room for optimization
- return self.__rdivmod__(other)[1]
- def __divmod__(self, other):
- # Naive implementation, room for optimization
- div = self // other
- return div, self - div * other
- def __rdivmod__(self, other):
- # Naive implementation, room for optimization
- div = other // self
- return div, other - div * self
- def truediv_object_array(ndarray left, ndarray right):
- cdef:
- ndarray[object] result = np.empty((<object>left).shape, dtype=object)
- object td64 # really timedelta64 if we find a way to declare that
- object obj, res_value
- _Timedelta td
- Py_ssize_t i
- for i in range(len(left)):
- td64 = left[i]
- obj = right[i]
- if get_timedelta64_value(td64) == NPY_NAT:
- # td here should be interpreted as a td64 NaT
- if _should_cast_to_timedelta(obj):
- res_value = np.nan
- else:
- # if its a number then let numpy handle division, otherwise
- # numpy will raise
- res_value = td64 / obj
- else:
- td = Timedelta(td64)
- res_value = td / obj
- result[i] = res_value
- return result
- def floordiv_object_array(ndarray left, ndarray right):
- cdef:
- ndarray[object] result = np.empty((<object>left).shape, dtype=object)
- object td64 # really timedelta64 if we find a way to declare that
- object obj, res_value
- _Timedelta td
- Py_ssize_t i
- for i in range(len(left)):
- td64 = left[i]
- obj = right[i]
- if get_timedelta64_value(td64) == NPY_NAT:
- # td here should be interpreted as a td64 NaT
- if _should_cast_to_timedelta(obj):
- res_value = np.nan
- else:
- # if its a number then let numpy handle division, otherwise
- # numpy will raise
- res_value = td64 // obj
- else:
- td = Timedelta(td64)
- res_value = td // obj
- result[i] = res_value
- return result
- cdef bint is_any_td_scalar(object obj):
- """
- Cython equivalent for `isinstance(obj, (timedelta, np.timedelta64, Tick))`
- Parameters
- ----------
- obj : object
- Returns
- -------
- bool
- """
- return (
- PyDelta_Check(obj) or is_timedelta64_object(obj) or is_tick_object(obj)
- )
- cdef bint _should_cast_to_timedelta(object obj):
- """
- Should we treat this object as a Timedelta for the purpose of a binary op
- """
- return (
- is_any_td_scalar(obj) or obj is None or obj is NaT or isinstance(obj, str)
- )
|