123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340 |
- """
- accessor.py contains base classes for implementing accessor properties
- that can be mixed into or pinned onto other pandas classes.
- """
- from __future__ import annotations
- from typing import (
- Callable,
- final,
- )
- import warnings
- from pandas.util._decorators import doc
- from pandas.util._exceptions import find_stack_level
- class DirNamesMixin:
- _accessors: set[str] = set()
- _hidden_attrs: frozenset[str] = frozenset()
- @final
- def _dir_deletions(self) -> set[str]:
- """
- Delete unwanted __dir__ for this object.
- """
- return self._accessors | self._hidden_attrs
- def _dir_additions(self) -> set[str]:
- """
- Add additional __dir__ for this object.
- """
- return {accessor for accessor in self._accessors if hasattr(self, accessor)}
- def __dir__(self) -> list[str]:
- """
- Provide method name lookup and completion.
- Notes
- -----
- Only provide 'public' methods.
- """
- rv = set(super().__dir__())
- rv = (rv - self._dir_deletions()) | self._dir_additions()
- return sorted(rv)
- class PandasDelegate:
- """
- Abstract base class for delegating methods/properties.
- """
- def _delegate_property_get(self, name, *args, **kwargs):
- raise TypeError(f"You cannot access the property {name}")
- def _delegate_property_set(self, name, value, *args, **kwargs):
- raise TypeError(f"The property {name} cannot be set")
- def _delegate_method(self, name, *args, **kwargs):
- raise TypeError(f"You cannot call method {name}")
- @classmethod
- def _add_delegate_accessors(
- cls,
- delegate,
- accessors: list[str],
- typ: str,
- overwrite: bool = False,
- accessor_mapping: Callable[[str], str] = lambda x: x,
- raise_on_missing: bool = True,
- ) -> None:
- """
- Add accessors to cls from the delegate class.
- Parameters
- ----------
- cls
- Class to add the methods/properties to.
- delegate
- Class to get methods/properties and doc-strings.
- accessors : list of str
- List of accessors to add.
- typ : {'property', 'method'}
- overwrite : bool, default False
- Overwrite the method/property in the target class if it exists.
- accessor_mapping: Callable, default lambda x: x
- Callable to map the delegate's function to the cls' function.
- raise_on_missing: bool, default True
- Raise if an accessor does not exist on delegate.
- False skips the missing accessor.
- """
- def _create_delegator_property(name):
- def _getter(self):
- return self._delegate_property_get(name)
- def _setter(self, new_values):
- return self._delegate_property_set(name, new_values)
- _getter.__name__ = name
- _setter.__name__ = name
- return property(
- fget=_getter,
- fset=_setter,
- doc=getattr(delegate, accessor_mapping(name)).__doc__,
- )
- def _create_delegator_method(name):
- def f(self, *args, **kwargs):
- return self._delegate_method(name, *args, **kwargs)
- f.__name__ = name
- f.__doc__ = getattr(delegate, accessor_mapping(name)).__doc__
- return f
- for name in accessors:
- if (
- not raise_on_missing
- and getattr(delegate, accessor_mapping(name), None) is None
- ):
- continue
- if typ == "property":
- f = _create_delegator_property(name)
- else:
- f = _create_delegator_method(name)
- # don't overwrite existing methods/properties
- if overwrite or not hasattr(cls, name):
- setattr(cls, name, f)
- def delegate_names(
- delegate,
- accessors: list[str],
- typ: str,
- overwrite: bool = False,
- accessor_mapping: Callable[[str], str] = lambda x: x,
- raise_on_missing: bool = True,
- ):
- """
- Add delegated names to a class using a class decorator. This provides
- an alternative usage to directly calling `_add_delegate_accessors`
- below a class definition.
- Parameters
- ----------
- delegate : object
- The class to get methods/properties & doc-strings.
- accessors : Sequence[str]
- List of accessor to add.
- typ : {'property', 'method'}
- overwrite : bool, default False
- Overwrite the method/property in the target class if it exists.
- accessor_mapping: Callable, default lambda x: x
- Callable to map the delegate's function to the cls' function.
- raise_on_missing: bool, default True
- Raise if an accessor does not exist on delegate.
- False skips the missing accessor.
- Returns
- -------
- callable
- A class decorator.
- Examples
- --------
- @delegate_names(Categorical, ["categories", "ordered"], "property")
- class CategoricalAccessor(PandasDelegate):
- [...]
- """
- def add_delegate_accessors(cls):
- cls._add_delegate_accessors(
- delegate,
- accessors,
- typ,
- overwrite=overwrite,
- accessor_mapping=accessor_mapping,
- raise_on_missing=raise_on_missing,
- )
- return cls
- return add_delegate_accessors
- # Ported with modifications from xarray
- # https://github.com/pydata/xarray/blob/master/xarray/core/extensions.py
- # 1. We don't need to catch and re-raise AttributeErrors as RuntimeErrors
- # 2. We use a UserWarning instead of a custom Warning
- class CachedAccessor:
- """
- Custom property-like object.
- A descriptor for caching accessors.
- Parameters
- ----------
- name : str
- Namespace that will be accessed under, e.g. ``df.foo``.
- accessor : cls
- Class with the extension methods.
- Notes
- -----
- For accessor, The class's __init__ method assumes that one of
- ``Series``, ``DataFrame`` or ``Index`` as the
- single argument ``data``.
- """
- def __init__(self, name: str, accessor) -> None:
- self._name = name
- self._accessor = accessor
- def __get__(self, obj, cls):
- if obj is None:
- # we're accessing the attribute of the class, i.e., Dataset.geo
- return self._accessor
- accessor_obj = self._accessor(obj)
- # Replace the property with the accessor object. Inspired by:
- # https://www.pydanny.com/cached-property.html
- # We need to use object.__setattr__ because we overwrite __setattr__ on
- # NDFrame
- object.__setattr__(obj, self._name, accessor_obj)
- return accessor_obj
- @doc(klass="", others="")
- def _register_accessor(name, cls):
- """
- Register a custom accessor on {klass} objects.
- Parameters
- ----------
- name : str
- Name under which the accessor should be registered. A warning is issued
- if this name conflicts with a preexisting attribute.
- Returns
- -------
- callable
- A class decorator.
- See Also
- --------
- register_dataframe_accessor : Register a custom accessor on DataFrame objects.
- register_series_accessor : Register a custom accessor on Series objects.
- register_index_accessor : Register a custom accessor on Index objects.
- Notes
- -----
- When accessed, your accessor will be initialized with the pandas object
- the user is interacting with. So the signature must be
- .. code-block:: python
- def __init__(self, pandas_object): # noqa: E999
- ...
- For consistency with pandas methods, you should raise an ``AttributeError``
- if the data passed to your accessor has an incorrect dtype.
- >>> pd.Series(['a', 'b']).dt
- Traceback (most recent call last):
- ...
- AttributeError: Can only use .dt accessor with datetimelike values
- Examples
- --------
- In your library code::
- import pandas as pd
- @pd.api.extensions.register_dataframe_accessor("geo")
- class GeoAccessor:
- def __init__(self, pandas_obj):
- self._obj = pandas_obj
- @property
- def center(self):
- # return the geographic center point of this DataFrame
- lat = self._obj.latitude
- lon = self._obj.longitude
- return (float(lon.mean()), float(lat.mean()))
- def plot(self):
- # plot this array's data on a map, e.g., using Cartopy
- pass
- Back in an interactive IPython session:
- .. code-block:: ipython
- In [1]: ds = pd.DataFrame({{"longitude": np.linspace(0, 10),
- ...: "latitude": np.linspace(0, 20)}})
- In [2]: ds.geo.center
- Out[2]: (5.0, 10.0)
- In [3]: ds.geo.plot() # plots data on a map
- """
- def decorator(accessor):
- if hasattr(cls, name):
- warnings.warn(
- f"registration of accessor {repr(accessor)} under name "
- f"{repr(name)} for type {repr(cls)} is overriding a preexisting "
- f"attribute with the same name.",
- UserWarning,
- stacklevel=find_stack_level(),
- )
- setattr(cls, name, CachedAccessor(name, accessor))
- cls._accessors.add(name)
- return accessor
- return decorator
- @doc(_register_accessor, klass="DataFrame")
- def register_dataframe_accessor(name):
- from pandas import DataFrame
- return _register_accessor(name, DataFrame)
- @doc(_register_accessor, klass="Series")
- def register_series_accessor(name):
- from pandas import Series
- return _register_accessor(name, Series)
- @doc(_register_accessor, klass="Index")
- def register_index_accessor(name):
- from pandas import Index
- return _register_accessor(name, Index)
|