base.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. """
  2. Base class for the internal managers. Both BlockManager and ArrayManager
  3. inherit from this class.
  4. """
  5. from __future__ import annotations
  6. from typing import (
  7. Literal,
  8. TypeVar,
  9. final,
  10. )
  11. import numpy as np
  12. from pandas._typing import (
  13. ArrayLike,
  14. AxisInt,
  15. DtypeObj,
  16. Shape,
  17. )
  18. from pandas.errors import AbstractMethodError
  19. from pandas.core.dtypes.cast import (
  20. find_common_type,
  21. np_can_hold_element,
  22. )
  23. from pandas.core.base import PandasObject
  24. from pandas.core.indexes.api import (
  25. Index,
  26. default_index,
  27. )
  28. T = TypeVar("T", bound="DataManager")
  29. class DataManager(PandasObject):
  30. # TODO share more methods/attributes
  31. axes: list[Index]
  32. @property
  33. def items(self) -> Index:
  34. raise AbstractMethodError(self)
  35. @final
  36. def __len__(self) -> int:
  37. return len(self.items)
  38. @property
  39. def ndim(self) -> int:
  40. return len(self.axes)
  41. @property
  42. def shape(self) -> Shape:
  43. return tuple(len(ax) for ax in self.axes)
  44. @final
  45. def _validate_set_axis(self, axis: AxisInt, new_labels: Index) -> None:
  46. # Caller is responsible for ensuring we have an Index object.
  47. old_len = len(self.axes[axis])
  48. new_len = len(new_labels)
  49. if axis == 1 and len(self.items) == 0:
  50. # If we are setting the index on a DataFrame with no columns,
  51. # it is OK to change the length.
  52. pass
  53. elif new_len != old_len:
  54. raise ValueError(
  55. f"Length mismatch: Expected axis has {old_len} elements, new "
  56. f"values have {new_len} elements"
  57. )
  58. def reindex_indexer(
  59. self: T,
  60. new_axis,
  61. indexer,
  62. axis: AxisInt,
  63. fill_value=None,
  64. allow_dups: bool = False,
  65. copy: bool = True,
  66. only_slice: bool = False,
  67. ) -> T:
  68. raise AbstractMethodError(self)
  69. @final
  70. def reindex_axis(
  71. self: T,
  72. new_index: Index,
  73. axis: AxisInt,
  74. fill_value=None,
  75. only_slice: bool = False,
  76. ) -> T:
  77. """
  78. Conform data manager to new index.
  79. """
  80. new_index, indexer = self.axes[axis].reindex(new_index)
  81. return self.reindex_indexer(
  82. new_index,
  83. indexer,
  84. axis=axis,
  85. fill_value=fill_value,
  86. copy=False,
  87. only_slice=only_slice,
  88. )
  89. def _equal_values(self: T, other: T) -> bool:
  90. """
  91. To be implemented by the subclasses. Only check the column values
  92. assuming shape and indexes have already been checked.
  93. """
  94. raise AbstractMethodError(self)
  95. @final
  96. def equals(self, other: object) -> bool:
  97. """
  98. Implementation for DataFrame.equals
  99. """
  100. if not isinstance(other, DataManager):
  101. return False
  102. self_axes, other_axes = self.axes, other.axes
  103. if len(self_axes) != len(other_axes):
  104. return False
  105. if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)):
  106. return False
  107. return self._equal_values(other)
  108. def apply(
  109. self: T,
  110. f,
  111. align_keys: list[str] | None = None,
  112. **kwargs,
  113. ) -> T:
  114. raise AbstractMethodError(self)
  115. @final
  116. def isna(self: T, func) -> T:
  117. return self.apply("apply", func=func)
  118. # --------------------------------------------------------------------
  119. # Consolidation: No-ops for all but BlockManager
  120. def is_consolidated(self) -> bool:
  121. return True
  122. def consolidate(self: T) -> T:
  123. return self
  124. def _consolidate_inplace(self) -> None:
  125. return
  126. class SingleDataManager(DataManager):
  127. @property
  128. def ndim(self) -> Literal[1]:
  129. return 1
  130. @final
  131. @property
  132. def array(self) -> ArrayLike:
  133. """
  134. Quick access to the backing array of the Block or SingleArrayManager.
  135. """
  136. # error: "SingleDataManager" has no attribute "arrays"; maybe "array"
  137. return self.arrays[0] # type: ignore[attr-defined]
  138. def setitem_inplace(self, indexer, value) -> None:
  139. """
  140. Set values with indexer.
  141. For Single[Block/Array]Manager, this backs s[indexer] = value
  142. This is an inplace version of `setitem()`, mutating the manager/values
  143. in place, not returning a new Manager (and Block), and thus never changing
  144. the dtype.
  145. """
  146. arr = self.array
  147. # EAs will do this validation in their own __setitem__ methods.
  148. if isinstance(arr, np.ndarray):
  149. # Note: checking for ndarray instead of np.dtype means we exclude
  150. # dt64/td64, which do their own validation.
  151. value = np_can_hold_element(arr.dtype, value)
  152. if isinstance(value, np.ndarray) and value.ndim == 1 and len(value) == 1:
  153. # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615
  154. value = value[0, ...]
  155. arr[indexer] = value
  156. def grouped_reduce(self, func):
  157. arr = self.array
  158. res = func(arr)
  159. index = default_index(len(res))
  160. mgr = type(self).from_array(res, index)
  161. return mgr
  162. @classmethod
  163. def from_array(cls, arr: ArrayLike, index: Index):
  164. raise AbstractMethodError(cls)
  165. def interleaved_dtype(dtypes: list[DtypeObj]) -> DtypeObj | None:
  166. """
  167. Find the common dtype for `blocks`.
  168. Parameters
  169. ----------
  170. blocks : List[DtypeObj]
  171. Returns
  172. -------
  173. dtype : np.dtype, ExtensionDtype, or None
  174. None is returned when `blocks` is empty.
  175. """
  176. if not len(dtypes):
  177. return None
  178. return find_common_type(dtypes)