integer.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. from __future__ import annotations
  2. import numpy as np
  3. from pandas.core.dtypes.base import register_extension_dtype
  4. from pandas.core.dtypes.common import is_integer_dtype
  5. from pandas.core.arrays.numeric import (
  6. NumericArray,
  7. NumericDtype,
  8. )
  9. class IntegerDtype(NumericDtype):
  10. """
  11. An ExtensionDtype to hold a single size & kind of integer dtype.
  12. These specific implementations are subclasses of the non-public
  13. IntegerDtype. For example, we have Int8Dtype to represent signed int 8s.
  14. The attributes name & type are set when these subclasses are created.
  15. """
  16. _default_np_dtype = np.dtype(np.int64)
  17. _checker = is_integer_dtype
  18. @classmethod
  19. def construct_array_type(cls) -> type[IntegerArray]:
  20. """
  21. Return the array type associated with this dtype.
  22. Returns
  23. -------
  24. type
  25. """
  26. return IntegerArray
  27. @classmethod
  28. def _str_to_dtype_mapping(cls):
  29. return INT_STR_TO_DTYPE
  30. @classmethod
  31. def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
  32. """
  33. Safely cast the values to the given dtype.
  34. "safe" in this context means the casting is lossless. e.g. if 'values'
  35. has a floating dtype, each value must be an integer.
  36. """
  37. try:
  38. return values.astype(dtype, casting="safe", copy=copy)
  39. except TypeError as err:
  40. casted = values.astype(dtype, copy=copy)
  41. if (casted == values).all():
  42. return casted
  43. raise TypeError(
  44. f"cannot safely cast non-equivalent {values.dtype} to {np.dtype(dtype)}"
  45. ) from err
  46. class IntegerArray(NumericArray):
  47. """
  48. Array of integer (optional missing) values.
  49. Uses :attr:`pandas.NA` as the missing value.
  50. .. warning::
  51. IntegerArray is currently experimental, and its API or internal
  52. implementation may change without warning.
  53. We represent an IntegerArray with 2 numpy arrays:
  54. - data: contains a numpy integer array of the appropriate dtype
  55. - mask: a boolean array holding a mask on the data, True is missing
  56. To construct an IntegerArray from generic array-like input, use
  57. :func:`pandas.array` with one of the integer dtypes (see examples).
  58. See :ref:`integer_na` for more.
  59. Parameters
  60. ----------
  61. values : numpy.ndarray
  62. A 1-d integer-dtype array.
  63. mask : numpy.ndarray
  64. A 1-d boolean-dtype array indicating missing values.
  65. copy : bool, default False
  66. Whether to copy the `values` and `mask`.
  67. Attributes
  68. ----------
  69. None
  70. Methods
  71. -------
  72. None
  73. Returns
  74. -------
  75. IntegerArray
  76. Examples
  77. --------
  78. Create an IntegerArray with :func:`pandas.array`.
  79. >>> int_array = pd.array([1, None, 3], dtype=pd.Int32Dtype())
  80. >>> int_array
  81. <IntegerArray>
  82. [1, <NA>, 3]
  83. Length: 3, dtype: Int32
  84. String aliases for the dtypes are also available. They are capitalized.
  85. >>> pd.array([1, None, 3], dtype='Int32')
  86. <IntegerArray>
  87. [1, <NA>, 3]
  88. Length: 3, dtype: Int32
  89. >>> pd.array([1, None, 3], dtype='UInt16')
  90. <IntegerArray>
  91. [1, <NA>, 3]
  92. Length: 3, dtype: UInt16
  93. """
  94. _dtype_cls = IntegerDtype
  95. # The value used to fill '_data' to avoid upcasting
  96. _internal_fill_value = 1
  97. # Fill values used for any/all
  98. # Incompatible types in assignment (expression has type "int", base class
  99. # "BaseMaskedArray" defined the type as "<typing special form>")
  100. _truthy_value = 1 # type: ignore[assignment]
  101. _falsey_value = 0 # type: ignore[assignment]
  102. _dtype_docstring = """
  103. An ExtensionDtype for {dtype} integer data.
  104. Uses :attr:`pandas.NA` as its missing value, rather than :attr:`numpy.nan`.
  105. Attributes
  106. ----------
  107. None
  108. Methods
  109. -------
  110. None
  111. """
  112. # create the Dtype
  113. @register_extension_dtype
  114. class Int8Dtype(IntegerDtype):
  115. type = np.int8
  116. name = "Int8"
  117. __doc__ = _dtype_docstring.format(dtype="int8")
  118. @register_extension_dtype
  119. class Int16Dtype(IntegerDtype):
  120. type = np.int16
  121. name = "Int16"
  122. __doc__ = _dtype_docstring.format(dtype="int16")
  123. @register_extension_dtype
  124. class Int32Dtype(IntegerDtype):
  125. type = np.int32
  126. name = "Int32"
  127. __doc__ = _dtype_docstring.format(dtype="int32")
  128. @register_extension_dtype
  129. class Int64Dtype(IntegerDtype):
  130. type = np.int64
  131. name = "Int64"
  132. __doc__ = _dtype_docstring.format(dtype="int64")
  133. @register_extension_dtype
  134. class UInt8Dtype(IntegerDtype):
  135. type = np.uint8
  136. name = "UInt8"
  137. __doc__ = _dtype_docstring.format(dtype="uint8")
  138. @register_extension_dtype
  139. class UInt16Dtype(IntegerDtype):
  140. type = np.uint16
  141. name = "UInt16"
  142. __doc__ = _dtype_docstring.format(dtype="uint16")
  143. @register_extension_dtype
  144. class UInt32Dtype(IntegerDtype):
  145. type = np.uint32
  146. name = "UInt32"
  147. __doc__ = _dtype_docstring.format(dtype="uint32")
  148. @register_extension_dtype
  149. class UInt64Dtype(IntegerDtype):
  150. type = np.uint64
  151. name = "UInt64"
  152. __doc__ = _dtype_docstring.format(dtype="uint64")
  153. INT_STR_TO_DTYPE: dict[str, IntegerDtype] = {
  154. "int8": Int8Dtype(),
  155. "int16": Int16Dtype(),
  156. "int32": Int32Dtype(),
  157. "int64": Int64Dtype(),
  158. "uint8": UInt8Dtype(),
  159. "uint16": UInt16Dtype(),
  160. "uint32": UInt32Dtype(),
  161. "uint64": UInt64Dtype(),
  162. }