123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224 |
- """
- Base class for the internal managers. Both BlockManager and ArrayManager
- inherit from this class.
- """
- from __future__ import annotations
- from typing import (
- Literal,
- TypeVar,
- final,
- )
- import numpy as np
- from pandas._typing import (
- ArrayLike,
- AxisInt,
- DtypeObj,
- Shape,
- )
- from pandas.errors import AbstractMethodError
- from pandas.core.dtypes.cast import (
- find_common_type,
- np_can_hold_element,
- )
- from pandas.core.base import PandasObject
- from pandas.core.indexes.api import (
- Index,
- default_index,
- )
- T = TypeVar("T", bound="DataManager")
- class DataManager(PandasObject):
- # TODO share more methods/attributes
- axes: list[Index]
- @property
- def items(self) -> Index:
- raise AbstractMethodError(self)
- @final
- def __len__(self) -> int:
- return len(self.items)
- @property
- def ndim(self) -> int:
- return len(self.axes)
- @property
- def shape(self) -> Shape:
- return tuple(len(ax) for ax in self.axes)
- @final
- def _validate_set_axis(self, axis: AxisInt, new_labels: Index) -> None:
- # Caller is responsible for ensuring we have an Index object.
- old_len = len(self.axes[axis])
- new_len = len(new_labels)
- if axis == 1 and len(self.items) == 0:
- # If we are setting the index on a DataFrame with no columns,
- # it is OK to change the length.
- pass
- elif new_len != old_len:
- raise ValueError(
- f"Length mismatch: Expected axis has {old_len} elements, new "
- f"values have {new_len} elements"
- )
- def reindex_indexer(
- self: T,
- new_axis,
- indexer,
- axis: AxisInt,
- fill_value=None,
- allow_dups: bool = False,
- copy: bool = True,
- only_slice: bool = False,
- ) -> T:
- raise AbstractMethodError(self)
- @final
- def reindex_axis(
- self: T,
- new_index: Index,
- axis: AxisInt,
- fill_value=None,
- only_slice: bool = False,
- ) -> T:
- """
- Conform data manager to new index.
- """
- new_index, indexer = self.axes[axis].reindex(new_index)
- return self.reindex_indexer(
- new_index,
- indexer,
- axis=axis,
- fill_value=fill_value,
- copy=False,
- only_slice=only_slice,
- )
- def _equal_values(self: T, other: T) -> bool:
- """
- To be implemented by the subclasses. Only check the column values
- assuming shape and indexes have already been checked.
- """
- raise AbstractMethodError(self)
- @final
- def equals(self, other: object) -> bool:
- """
- Implementation for DataFrame.equals
- """
- if not isinstance(other, DataManager):
- return False
- self_axes, other_axes = self.axes, other.axes
- if len(self_axes) != len(other_axes):
- return False
- if not all(ax1.equals(ax2) for ax1, ax2 in zip(self_axes, other_axes)):
- return False
- return self._equal_values(other)
- def apply(
- self: T,
- f,
- align_keys: list[str] | None = None,
- **kwargs,
- ) -> T:
- raise AbstractMethodError(self)
- @final
- def isna(self: T, func) -> T:
- return self.apply("apply", func=func)
- # --------------------------------------------------------------------
- # Consolidation: No-ops for all but BlockManager
- def is_consolidated(self) -> bool:
- return True
- def consolidate(self: T) -> T:
- return self
- def _consolidate_inplace(self) -> None:
- return
- class SingleDataManager(DataManager):
- @property
- def ndim(self) -> Literal[1]:
- return 1
- @final
- @property
- def array(self) -> ArrayLike:
- """
- Quick access to the backing array of the Block or SingleArrayManager.
- """
- # error: "SingleDataManager" has no attribute "arrays"; maybe "array"
- return self.arrays[0] # type: ignore[attr-defined]
- def setitem_inplace(self, indexer, value) -> None:
- """
- Set values with indexer.
- For Single[Block/Array]Manager, this backs s[indexer] = value
- This is an inplace version of `setitem()`, mutating the manager/values
- in place, not returning a new Manager (and Block), and thus never changing
- the dtype.
- """
- arr = self.array
- # EAs will do this validation in their own __setitem__ methods.
- if isinstance(arr, np.ndarray):
- # Note: checking for ndarray instead of np.dtype means we exclude
- # dt64/td64, which do their own validation.
- value = np_can_hold_element(arr.dtype, value)
- if isinstance(value, np.ndarray) and value.ndim == 1 and len(value) == 1:
- # NumPy 1.25 deprecation: https://github.com/numpy/numpy/pull/10615
- value = value[0, ...]
- arr[indexer] = value
- def grouped_reduce(self, func):
- arr = self.array
- res = func(arr)
- index = default_index(len(res))
- mgr = type(self).from_array(res, index)
- return mgr
- @classmethod
- def from_array(cls, arr: ArrayLike, index: Index):
- raise AbstractMethodError(cls)
- def interleaved_dtype(dtypes: list[DtypeObj]) -> DtypeObj | None:
- """
- Find the common dtype for `blocks`.
- Parameters
- ----------
- blocks : List[DtypeObj]
- Returns
- -------
- dtype : np.dtype, ExtensionDtype, or None
- None is returned when `blocks` is empty.
- """
- if not len(dtypes):
- return None
- return find_common_type(dtypes)
|