123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220 |
- from __future__ import annotations
- import numpy as np
- from pandas.core.dtypes.base import register_extension_dtype
- from pandas.core.dtypes.common import is_integer_dtype
- from pandas.core.arrays.numeric import (
- NumericArray,
- NumericDtype,
- )
- class IntegerDtype(NumericDtype):
- """
- An ExtensionDtype to hold a single size & kind of integer dtype.
- These specific implementations are subclasses of the non-public
- IntegerDtype. For example, we have Int8Dtype to represent signed int 8s.
- The attributes name & type are set when these subclasses are created.
- """
- _default_np_dtype = np.dtype(np.int64)
- _checker = is_integer_dtype
- @classmethod
- def construct_array_type(cls) -> type[IntegerArray]:
- """
- Return the array type associated with this dtype.
- Returns
- -------
- type
- """
- return IntegerArray
- @classmethod
- def _str_to_dtype_mapping(cls):
- return INT_STR_TO_DTYPE
- @classmethod
- def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
- """
- Safely cast the values to the given dtype.
- "safe" in this context means the casting is lossless. e.g. if 'values'
- has a floating dtype, each value must be an integer.
- """
- try:
- return values.astype(dtype, casting="safe", copy=copy)
- except TypeError as err:
- casted = values.astype(dtype, copy=copy)
- if (casted == values).all():
- return casted
- raise TypeError(
- f"cannot safely cast non-equivalent {values.dtype} to {np.dtype(dtype)}"
- ) from err
- class IntegerArray(NumericArray):
- """
- Array of integer (optional missing) values.
- Uses :attr:`pandas.NA` as the missing value.
- .. warning::
- IntegerArray is currently experimental, and its API or internal
- implementation may change without warning.
- We represent an IntegerArray with 2 numpy arrays:
- - data: contains a numpy integer array of the appropriate dtype
- - mask: a boolean array holding a mask on the data, True is missing
- To construct an IntegerArray from generic array-like input, use
- :func:`pandas.array` with one of the integer dtypes (see examples).
- See :ref:`integer_na` for more.
- Parameters
- ----------
- values : numpy.ndarray
- A 1-d integer-dtype array.
- mask : numpy.ndarray
- A 1-d boolean-dtype array indicating missing values.
- copy : bool, default False
- Whether to copy the `values` and `mask`.
- Attributes
- ----------
- None
- Methods
- -------
- None
- Returns
- -------
- IntegerArray
- Examples
- --------
- Create an IntegerArray with :func:`pandas.array`.
- >>> int_array = pd.array([1, None, 3], dtype=pd.Int32Dtype())
- >>> int_array
- <IntegerArray>
- [1, <NA>, 3]
- Length: 3, dtype: Int32
- String aliases for the dtypes are also available. They are capitalized.
- >>> pd.array([1, None, 3], dtype='Int32')
- <IntegerArray>
- [1, <NA>, 3]
- Length: 3, dtype: Int32
- >>> pd.array([1, None, 3], dtype='UInt16')
- <IntegerArray>
- [1, <NA>, 3]
- Length: 3, dtype: UInt16
- """
- _dtype_cls = IntegerDtype
- # The value used to fill '_data' to avoid upcasting
- _internal_fill_value = 1
- # Fill values used for any/all
- # Incompatible types in assignment (expression has type "int", base class
- # "BaseMaskedArray" defined the type as "<typing special form>")
- _truthy_value = 1 # type: ignore[assignment]
- _falsey_value = 0 # type: ignore[assignment]
- _dtype_docstring = """
- An ExtensionDtype for {dtype} integer data.
- Uses :attr:`pandas.NA` as its missing value, rather than :attr:`numpy.nan`.
- Attributes
- ----------
- None
- Methods
- -------
- None
- """
- # create the Dtype
- @register_extension_dtype
- class Int8Dtype(IntegerDtype):
- type = np.int8
- name = "Int8"
- __doc__ = _dtype_docstring.format(dtype="int8")
- @register_extension_dtype
- class Int16Dtype(IntegerDtype):
- type = np.int16
- name = "Int16"
- __doc__ = _dtype_docstring.format(dtype="int16")
- @register_extension_dtype
- class Int32Dtype(IntegerDtype):
- type = np.int32
- name = "Int32"
- __doc__ = _dtype_docstring.format(dtype="int32")
- @register_extension_dtype
- class Int64Dtype(IntegerDtype):
- type = np.int64
- name = "Int64"
- __doc__ = _dtype_docstring.format(dtype="int64")
- @register_extension_dtype
- class UInt8Dtype(IntegerDtype):
- type = np.uint8
- name = "UInt8"
- __doc__ = _dtype_docstring.format(dtype="uint8")
- @register_extension_dtype
- class UInt16Dtype(IntegerDtype):
- type = np.uint16
- name = "UInt16"
- __doc__ = _dtype_docstring.format(dtype="uint16")
- @register_extension_dtype
- class UInt32Dtype(IntegerDtype):
- type = np.uint32
- name = "UInt32"
- __doc__ = _dtype_docstring.format(dtype="uint32")
- @register_extension_dtype
- class UInt64Dtype(IntegerDtype):
- type = np.uint64
- name = "UInt64"
- __doc__ = _dtype_docstring.format(dtype="uint64")
- INT_STR_TO_DTYPE: dict[str, IntegerDtype] = {
- "int8": Int8Dtype(),
- "int16": Int16Dtype(),
- "int32": Int32Dtype(),
- "int64": Int64Dtype(),
- "uint8": UInt8Dtype(),
- "uint16": UInt16Dtype(),
- "uint32": UInt32Dtype(),
- "uint64": UInt64Dtype(),
- }
|