123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111 |
- from __future__ import annotations
- from collections import abc
- from typing import TYPE_CHECKING
- from pandas.core.interchange.column import PandasColumn
- from pandas.core.interchange.dataframe_protocol import DataFrame as DataFrameXchg
- if TYPE_CHECKING:
- from pandas import (
- DataFrame,
- Index,
- )
- class PandasDataFrameXchg(DataFrameXchg):
- """
- A data frame class, with only the methods required by the interchange
- protocol defined.
- Instances of this (private) class are returned from
- ``pd.DataFrame.__dataframe__`` as objects with the methods and
- attributes defined on this class.
- """
- def __init__(
- self, df: DataFrame, nan_as_null: bool = False, allow_copy: bool = True
- ) -> None:
- """
- Constructor - an instance of this (private) class is returned from
- `pd.DataFrame.__dataframe__`.
- """
- self._df = df
- # ``nan_as_null`` is a keyword intended for the consumer to tell the
- # producer to overwrite null values in the data with ``NaN`` (or ``NaT``).
- # This currently has no effect; once support for nullable extension
- # dtypes is added, this value should be propagated to columns.
- self._nan_as_null = nan_as_null
- self._allow_copy = allow_copy
- def __dataframe__(
- self, nan_as_null: bool = False, allow_copy: bool = True
- ) -> PandasDataFrameXchg:
- return PandasDataFrameXchg(self._df, nan_as_null, allow_copy)
- @property
- def metadata(self) -> dict[str, Index]:
- # `index` isn't a regular column, and the protocol doesn't support row
- # labels - so we export it as Pandas-specific metadata here.
- return {"pandas.index": self._df.index}
- def num_columns(self) -> int:
- return len(self._df.columns)
- def num_rows(self) -> int:
- return len(self._df)
- def num_chunks(self) -> int:
- return 1
- def column_names(self) -> Index:
- return self._df.columns
- def get_column(self, i: int) -> PandasColumn:
- return PandasColumn(self._df.iloc[:, i], allow_copy=self._allow_copy)
- def get_column_by_name(self, name: str) -> PandasColumn:
- return PandasColumn(self._df[name], allow_copy=self._allow_copy)
- def get_columns(self) -> list[PandasColumn]:
- return [
- PandasColumn(self._df[name], allow_copy=self._allow_copy)
- for name in self._df.columns
- ]
- def select_columns(self, indices) -> PandasDataFrameXchg:
- if not isinstance(indices, abc.Sequence):
- raise ValueError("`indices` is not a sequence")
- if not isinstance(indices, list):
- indices = list(indices)
- return PandasDataFrameXchg(
- self._df.iloc[:, indices], self._nan_as_null, self._allow_copy
- )
- def select_columns_by_name(self, names) -> PandasDataFrameXchg:
- if not isinstance(names, abc.Sequence):
- raise ValueError("`names` is not a sequence")
- if not isinstance(names, list):
- names = list(names)
- return PandasDataFrameXchg(
- self._df.loc[:, names], self._nan_as_null, self._allow_copy
- )
- def get_chunks(self, n_chunks=None):
- """
- Return an iterator yielding the chunks.
- """
- if n_chunks and n_chunks > 1:
- size = len(self._df)
- step = size // n_chunks
- if size % n_chunks != 0:
- step += 1
- for start in range(0, step * n_chunks, step):
- yield PandasDataFrameXchg(
- self._df.iloc[start : start + step, :],
- self._nan_as_null,
- self._allow_copy,
- )
- else:
- yield self
|