| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513 | from decimal import Decimalimport numbersfrom sys import maxsizecimport cythonfrom cpython.datetime cimport (    date,    time,    timedelta,)from cython cimport Py_ssize_timport numpy as npcimport numpy as cnpfrom numpy cimport (    flatiter,    float64_t,    int64_t,    ndarray,    uint8_t,)cnp.import_array()from pandas._libs cimport utilfrom pandas._libs.tslibs.nattype cimport (    c_NaT as NaT,    checknull_with_nat,    is_dt64nat,    is_td64nat,)from pandas._libs.tslibs.np_datetime cimport (    get_datetime64_unit,    get_datetime64_value,    get_timedelta64_value,)from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_opcdef:    float64_t INF = <float64_t>np.inf    float64_t NEGINF = -INF    int64_t NPY_NAT = util.get_nat()    bint is_32bit = maxsize <= 2 ** 32    type cDecimal = Decimal  # for faster isinstance checkscpdef bint check_na_tuples_nonequal(object left, object right):    """    When we have NA in one of the tuples but not the other we have to check here,    because our regular checks fail before with ambigous boolean value.    Parameters    ----------    left: Any    right: Any    Returns    -------    True if we are dealing with tuples that have NA on one side and non NA on    the other side.    """    if not isinstance(left, tuple) or not isinstance(right, tuple):        return False    if len(left) != len(right):        return False    for left_element, right_element in zip(left, right):        if left_element is C_NA and right_element is not C_NA:            return True        elif right_element is C_NA and left_element is not C_NA:            return True    return Falsecpdef bint is_matching_na(object left, object right, bint nan_matches_none=False):    """    Check if two scalars are both NA of matching types.    Parameters    ----------    left : Any    right : Any    nan_matches_none : bool, default False        For backwards compatibility, consider NaN as matching None.    Returns    -------    bool    """    if left is None:        if nan_matches_none and util.is_nan(right):            return True        return right is None    elif left is C_NA:        return right is C_NA    elif left is NaT:        return right is NaT    elif util.is_float_object(left):        if nan_matches_none and right is None and util.is_nan(left):            return True        return (            util.is_nan(left)            and util.is_float_object(right)            and util.is_nan(right)        )    elif util.is_complex_object(left):        return (            util.is_nan(left)            and util.is_complex_object(right)            and util.is_nan(right)        )    elif util.is_datetime64_object(left):        return (            get_datetime64_value(left) == NPY_NAT            and util.is_datetime64_object(right)            and get_datetime64_value(right) == NPY_NAT            and get_datetime64_unit(left) == get_datetime64_unit(right)        )    elif util.is_timedelta64_object(left):        return (            get_timedelta64_value(left) == NPY_NAT            and util.is_timedelta64_object(right)            and get_timedelta64_value(right) == NPY_NAT            and get_datetime64_unit(left) == get_datetime64_unit(right)        )    elif is_decimal_na(left):        return is_decimal_na(right)    return Falsecpdef bint checknull(object val, bint inf_as_na=False):    """    Return boolean describing of the input is NA-like, defined here as any    of:     - None     - nan     - NaT     - np.datetime64 representation of NaT     - np.timedelta64 representation of NaT     - NA     - Decimal("NaN")    Parameters    ----------    val : object    inf_as_na : bool, default False        Whether to treat INF and -INF as NA values.    Returns    -------    bool    """    if val is None or val is NaT or val is C_NA:        return True    elif util.is_float_object(val) or util.is_complex_object(val):        if val != val:            return True        elif inf_as_na:            return val == INF or val == NEGINF        return False    elif util.is_timedelta64_object(val):        return get_timedelta64_value(val) == NPY_NAT    elif util.is_datetime64_object(val):        return get_datetime64_value(val) == NPY_NAT    else:        return is_decimal_na(val)cdef bint is_decimal_na(object val):    """    Is this a decimal.Decimal object Decimal("NAN").    """    return isinstance(val, cDecimal) and val != val@cython.wraparound(False)@cython.boundscheck(False)cpdef ndarray[uint8_t] isnaobj(ndarray arr, bint inf_as_na=False):    """    Return boolean mask denoting which elements of a 1-D array are na-like,    according to the criteria defined in `checknull`:     - None     - nan     - NaT     - np.datetime64 representation of NaT     - np.timedelta64 representation of NaT     - NA     - Decimal("NaN")    Parameters    ----------    arr : ndarray    Returns    -------    result : ndarray (dtype=np.bool_)    """    cdef:        Py_ssize_t i, n = arr.size        object val        bint is_null        ndarray result = np.empty((<object>arr).shape, dtype=np.uint8)        flatiter it = cnp.PyArray_IterNew(arr)        flatiter it2 = cnp.PyArray_IterNew(result)    for i in range(n):        # The PyArray_GETITEM and PyArray_ITER_NEXT are faster        #  equivalents to `val = values[i]`        val = cnp.PyArray_GETITEM(arr, cnp.PyArray_ITER_DATA(it))        cnp.PyArray_ITER_NEXT(it)        is_null = checknull(val, inf_as_na=inf_as_na)        # Dereference pointer (set value)        (<uint8_t *>(cnp.PyArray_ITER_DATA(it2)))[0] = <uint8_t>is_null        cnp.PyArray_ITER_NEXT(it2)    return result.view(np.bool_)def isposinf_scalar(val: object) -> bool:    return util.is_float_object(val) and val == INFdef isneginf_scalar(val: object) -> bool:    return util.is_float_object(val) and val == NEGINFcdef bint is_null_datetime64(v):    # determine if we have a null for a datetime (or integer versions),    # excluding np.timedelta64('nat')    if checknull_with_nat(v) or is_dt64nat(v):        return True    return Falsecdef bint is_null_timedelta64(v):    # determine if we have a null for a timedelta (or integer versions),    # excluding np.datetime64('nat')    if checknull_with_nat(v) or is_td64nat(v):        return True    return Falsecdef bint checknull_with_nat_and_na(object obj):    # See GH#32214    return checknull_with_nat(obj) or obj is C_NA@cython.wraparound(False)@cython.boundscheck(False)def is_float_nan(values: ndarray) -> ndarray:    """    True for elements which correspond to a float nan    Returns    -------    ndarray[bool]    """    cdef:        ndarray[uint8_t] result        Py_ssize_t i, N        object val    N = len(values)    result = np.zeros(N, dtype=np.uint8)    for i in range(N):        val = values[i]        if util.is_nan(val):            result[i] = True    return result.view(bool)@cython.wraparound(False)@cython.boundscheck(False)def is_numeric_na(values: ndarray) -> ndarray:    """    Check for NA values consistent with IntegerArray/FloatingArray.    Similar to a vectorized is_valid_na_for_dtype restricted to numeric dtypes.    Returns    -------    ndarray[bool]    """    cdef:        ndarray[uint8_t] result        Py_ssize_t i, N        object val    N = len(values)    result = np.zeros(N, dtype=np.uint8)    for i in range(N):        val = values[i]        if checknull(val):            if val is None or val is C_NA or util.is_nan(val) or is_decimal_na(val):                result[i] = True            else:                raise TypeError(f"'values' contains non-numeric NA {val}")    return result.view(bool)# -----------------------------------------------------------------------------# Implementation of NA singletondef _create_binary_propagating_op(name, is_divmod=False):    is_cmp = name.strip("_") in ["eq", "ne", "le", "lt", "ge", "gt"]    def method(self, other):        if (other is C_NA or isinstance(other, (str, bytes))                or isinstance(other, (numbers.Number, np.bool_))                or util.is_array(other) and not other.shape):            # Need the other.shape clause to handle NumPy scalars,            # since we do a setitem on `out` below, which            # won't work for NumPy scalars.            if is_divmod:                return NA, NA            else:                return NA        elif util.is_array(other):            out = np.empty(other.shape, dtype=object)            out[:] = NA            if is_divmod:                return out, out.copy()            else:                return out        elif is_cmp and isinstance(other, (date, time, timedelta)):            return NA        return NotImplemented    method.__name__ = name    return methoddef _create_unary_propagating_op(name: str):    def method(self):        return NA    method.__name__ = name    return methodcdef class C_NAType:    passclass NAType(C_NAType):    """    NA ("not available") missing value indicator.    .. warning::       Experimental: the behaviour of NA can still change without warning.    The NA singleton is a missing value indicator defined by pandas. It is    used in certain new extension dtypes (currently the "string" dtype).    """    _instance = None    def __new__(cls, *args, **kwargs):        if NAType._instance is None:            NAType._instance = C_NAType.__new__(cls, *args, **kwargs)        return NAType._instance    def __repr__(self) -> str:        return "<NA>"    def __format__(self, format_spec) -> str:        try:            return self.__repr__().__format__(format_spec)        except ValueError:            return self.__repr__()    def __bool__(self):        raise TypeError("boolean value of NA is ambiguous")    def __hash__(self):        # GH 30013: Ensure hash is large enough to avoid hash collisions with integers        exponent = 31 if is_32bit else 61        return 2 ** exponent - 1    def __reduce__(self):        return "NA"    # Binary arithmetic and comparison ops -> propagate    __add__ = _create_binary_propagating_op("__add__")    __radd__ = _create_binary_propagating_op("__radd__")    __sub__ = _create_binary_propagating_op("__sub__")    __rsub__ = _create_binary_propagating_op("__rsub__")    __mul__ = _create_binary_propagating_op("__mul__")    __rmul__ = _create_binary_propagating_op("__rmul__")    __matmul__ = _create_binary_propagating_op("__matmul__")    __rmatmul__ = _create_binary_propagating_op("__rmatmul__")    __truediv__ = _create_binary_propagating_op("__truediv__")    __rtruediv__ = _create_binary_propagating_op("__rtruediv__")    __floordiv__ = _create_binary_propagating_op("__floordiv__")    __rfloordiv__ = _create_binary_propagating_op("__rfloordiv__")    __mod__ = _create_binary_propagating_op("__mod__")    __rmod__ = _create_binary_propagating_op("__rmod__")    __divmod__ = _create_binary_propagating_op("__divmod__", is_divmod=True)    __rdivmod__ = _create_binary_propagating_op("__rdivmod__", is_divmod=True)    # __lshift__ and __rshift__ are not implemented    __eq__ = _create_binary_propagating_op("__eq__")    __ne__ = _create_binary_propagating_op("__ne__")    __le__ = _create_binary_propagating_op("__le__")    __lt__ = _create_binary_propagating_op("__lt__")    __gt__ = _create_binary_propagating_op("__gt__")    __ge__ = _create_binary_propagating_op("__ge__")    # Unary ops    __neg__ = _create_unary_propagating_op("__neg__")    __pos__ = _create_unary_propagating_op("__pos__")    __abs__ = _create_unary_propagating_op("__abs__")    __invert__ = _create_unary_propagating_op("__invert__")    # pow has special    def __pow__(self, other):        if other is C_NA:            return NA        elif isinstance(other, (numbers.Number, np.bool_)):            if other == 0:                # returning positive is correct for +/- 0.                return type(other)(1)            else:                return NA        elif util.is_array(other):            return np.where(other == 0, other.dtype.type(1), NA)        return NotImplemented    def __rpow__(self, other):        if other is C_NA:            return NA        elif isinstance(other, (numbers.Number, np.bool_)):            if other == 1:                return other            else:                return NA        elif util.is_array(other):            return np.where(other == 1, other, NA)        return NotImplemented    # Logical ops using Kleene logic    def __and__(self, other):        if other is False:            return False        elif other is True or other is C_NA:            return NA        return NotImplemented    __rand__ = __and__    def __or__(self, other):        if other is True:            return True        elif other is False or other is C_NA:            return NA        return NotImplemented    __ror__ = __or__    def __xor__(self, other):        if other is False or other is True or other is C_NA:            return NA        return NotImplemented    __rxor__ = __xor__    __array_priority__ = 1000    _HANDLED_TYPES = (np.ndarray, numbers.Number, str, np.bool_)    def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):        types = self._HANDLED_TYPES + (NAType,)        for x in inputs:            if not isinstance(x, types):                return NotImplemented        if method != "__call__":            raise ValueError(f"ufunc method '{method}' not supported for NA")        result = maybe_dispatch_ufunc_to_dunder_op(            self, ufunc, method, *inputs, **kwargs        )        if result is NotImplemented:            # For a NumPy ufunc that's not a binop, like np.logaddexp            index = [i for i, x in enumerate(inputs) if x is NA][0]            result = np.broadcast_arrays(*inputs)[index]            if result.ndim == 0:                result = result.item()            if ufunc.nout > 1:                result = (NA,) * ufunc.nout        return resultC_NA = NAType()   # C-visibleNA = C_NA         # Python-visible
 |