accessor.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340
  1. """
  2. accessor.py contains base classes for implementing accessor properties
  3. that can be mixed into or pinned onto other pandas classes.
  4. """
  5. from __future__ import annotations
  6. from typing import (
  7. Callable,
  8. final,
  9. )
  10. import warnings
  11. from pandas.util._decorators import doc
  12. from pandas.util._exceptions import find_stack_level
  13. class DirNamesMixin:
  14. _accessors: set[str] = set()
  15. _hidden_attrs: frozenset[str] = frozenset()
  16. @final
  17. def _dir_deletions(self) -> set[str]:
  18. """
  19. Delete unwanted __dir__ for this object.
  20. """
  21. return self._accessors | self._hidden_attrs
  22. def _dir_additions(self) -> set[str]:
  23. """
  24. Add additional __dir__ for this object.
  25. """
  26. return {accessor for accessor in self._accessors if hasattr(self, accessor)}
  27. def __dir__(self) -> list[str]:
  28. """
  29. Provide method name lookup and completion.
  30. Notes
  31. -----
  32. Only provide 'public' methods.
  33. """
  34. rv = set(super().__dir__())
  35. rv = (rv - self._dir_deletions()) | self._dir_additions()
  36. return sorted(rv)
  37. class PandasDelegate:
  38. """
  39. Abstract base class for delegating methods/properties.
  40. """
  41. def _delegate_property_get(self, name, *args, **kwargs):
  42. raise TypeError(f"You cannot access the property {name}")
  43. def _delegate_property_set(self, name, value, *args, **kwargs):
  44. raise TypeError(f"The property {name} cannot be set")
  45. def _delegate_method(self, name, *args, **kwargs):
  46. raise TypeError(f"You cannot call method {name}")
  47. @classmethod
  48. def _add_delegate_accessors(
  49. cls,
  50. delegate,
  51. accessors: list[str],
  52. typ: str,
  53. overwrite: bool = False,
  54. accessor_mapping: Callable[[str], str] = lambda x: x,
  55. raise_on_missing: bool = True,
  56. ) -> None:
  57. """
  58. Add accessors to cls from the delegate class.
  59. Parameters
  60. ----------
  61. cls
  62. Class to add the methods/properties to.
  63. delegate
  64. Class to get methods/properties and doc-strings.
  65. accessors : list of str
  66. List of accessors to add.
  67. typ : {'property', 'method'}
  68. overwrite : bool, default False
  69. Overwrite the method/property in the target class if it exists.
  70. accessor_mapping: Callable, default lambda x: x
  71. Callable to map the delegate's function to the cls' function.
  72. raise_on_missing: bool, default True
  73. Raise if an accessor does not exist on delegate.
  74. False skips the missing accessor.
  75. """
  76. def _create_delegator_property(name):
  77. def _getter(self):
  78. return self._delegate_property_get(name)
  79. def _setter(self, new_values):
  80. return self._delegate_property_set(name, new_values)
  81. _getter.__name__ = name
  82. _setter.__name__ = name
  83. return property(
  84. fget=_getter,
  85. fset=_setter,
  86. doc=getattr(delegate, accessor_mapping(name)).__doc__,
  87. )
  88. def _create_delegator_method(name):
  89. def f(self, *args, **kwargs):
  90. return self._delegate_method(name, *args, **kwargs)
  91. f.__name__ = name
  92. f.__doc__ = getattr(delegate, accessor_mapping(name)).__doc__
  93. return f
  94. for name in accessors:
  95. if (
  96. not raise_on_missing
  97. and getattr(delegate, accessor_mapping(name), None) is None
  98. ):
  99. continue
  100. if typ == "property":
  101. f = _create_delegator_property(name)
  102. else:
  103. f = _create_delegator_method(name)
  104. # don't overwrite existing methods/properties
  105. if overwrite or not hasattr(cls, name):
  106. setattr(cls, name, f)
  107. def delegate_names(
  108. delegate,
  109. accessors: list[str],
  110. typ: str,
  111. overwrite: bool = False,
  112. accessor_mapping: Callable[[str], str] = lambda x: x,
  113. raise_on_missing: bool = True,
  114. ):
  115. """
  116. Add delegated names to a class using a class decorator. This provides
  117. an alternative usage to directly calling `_add_delegate_accessors`
  118. below a class definition.
  119. Parameters
  120. ----------
  121. delegate : object
  122. The class to get methods/properties & doc-strings.
  123. accessors : Sequence[str]
  124. List of accessor to add.
  125. typ : {'property', 'method'}
  126. overwrite : bool, default False
  127. Overwrite the method/property in the target class if it exists.
  128. accessor_mapping: Callable, default lambda x: x
  129. Callable to map the delegate's function to the cls' function.
  130. raise_on_missing: bool, default True
  131. Raise if an accessor does not exist on delegate.
  132. False skips the missing accessor.
  133. Returns
  134. -------
  135. callable
  136. A class decorator.
  137. Examples
  138. --------
  139. @delegate_names(Categorical, ["categories", "ordered"], "property")
  140. class CategoricalAccessor(PandasDelegate):
  141. [...]
  142. """
  143. def add_delegate_accessors(cls):
  144. cls._add_delegate_accessors(
  145. delegate,
  146. accessors,
  147. typ,
  148. overwrite=overwrite,
  149. accessor_mapping=accessor_mapping,
  150. raise_on_missing=raise_on_missing,
  151. )
  152. return cls
  153. return add_delegate_accessors
  154. # Ported with modifications from xarray
  155. # https://github.com/pydata/xarray/blob/master/xarray/core/extensions.py
  156. # 1. We don't need to catch and re-raise AttributeErrors as RuntimeErrors
  157. # 2. We use a UserWarning instead of a custom Warning
  158. class CachedAccessor:
  159. """
  160. Custom property-like object.
  161. A descriptor for caching accessors.
  162. Parameters
  163. ----------
  164. name : str
  165. Namespace that will be accessed under, e.g. ``df.foo``.
  166. accessor : cls
  167. Class with the extension methods.
  168. Notes
  169. -----
  170. For accessor, The class's __init__ method assumes that one of
  171. ``Series``, ``DataFrame`` or ``Index`` as the
  172. single argument ``data``.
  173. """
  174. def __init__(self, name: str, accessor) -> None:
  175. self._name = name
  176. self._accessor = accessor
  177. def __get__(self, obj, cls):
  178. if obj is None:
  179. # we're accessing the attribute of the class, i.e., Dataset.geo
  180. return self._accessor
  181. accessor_obj = self._accessor(obj)
  182. # Replace the property with the accessor object. Inspired by:
  183. # https://www.pydanny.com/cached-property.html
  184. # We need to use object.__setattr__ because we overwrite __setattr__ on
  185. # NDFrame
  186. object.__setattr__(obj, self._name, accessor_obj)
  187. return accessor_obj
  188. @doc(klass="", others="")
  189. def _register_accessor(name, cls):
  190. """
  191. Register a custom accessor on {klass} objects.
  192. Parameters
  193. ----------
  194. name : str
  195. Name under which the accessor should be registered. A warning is issued
  196. if this name conflicts with a preexisting attribute.
  197. Returns
  198. -------
  199. callable
  200. A class decorator.
  201. See Also
  202. --------
  203. register_dataframe_accessor : Register a custom accessor on DataFrame objects.
  204. register_series_accessor : Register a custom accessor on Series objects.
  205. register_index_accessor : Register a custom accessor on Index objects.
  206. Notes
  207. -----
  208. When accessed, your accessor will be initialized with the pandas object
  209. the user is interacting with. So the signature must be
  210. .. code-block:: python
  211. def __init__(self, pandas_object): # noqa: E999
  212. ...
  213. For consistency with pandas methods, you should raise an ``AttributeError``
  214. if the data passed to your accessor has an incorrect dtype.
  215. >>> pd.Series(['a', 'b']).dt
  216. Traceback (most recent call last):
  217. ...
  218. AttributeError: Can only use .dt accessor with datetimelike values
  219. Examples
  220. --------
  221. In your library code::
  222. import pandas as pd
  223. @pd.api.extensions.register_dataframe_accessor("geo")
  224. class GeoAccessor:
  225. def __init__(self, pandas_obj):
  226. self._obj = pandas_obj
  227. @property
  228. def center(self):
  229. # return the geographic center point of this DataFrame
  230. lat = self._obj.latitude
  231. lon = self._obj.longitude
  232. return (float(lon.mean()), float(lat.mean()))
  233. def plot(self):
  234. # plot this array's data on a map, e.g., using Cartopy
  235. pass
  236. Back in an interactive IPython session:
  237. .. code-block:: ipython
  238. In [1]: ds = pd.DataFrame({{"longitude": np.linspace(0, 10),
  239. ...: "latitude": np.linspace(0, 20)}})
  240. In [2]: ds.geo.center
  241. Out[2]: (5.0, 10.0)
  242. In [3]: ds.geo.plot() # plots data on a map
  243. """
  244. def decorator(accessor):
  245. if hasattr(cls, name):
  246. warnings.warn(
  247. f"registration of accessor {repr(accessor)} under name "
  248. f"{repr(name)} for type {repr(cls)} is overriding a preexisting "
  249. f"attribute with the same name.",
  250. UserWarning,
  251. stacklevel=find_stack_level(),
  252. )
  253. setattr(cls, name, CachedAccessor(name, accessor))
  254. cls._accessors.add(name)
  255. return accessor
  256. return decorator
  257. @doc(_register_accessor, klass="DataFrame")
  258. def register_dataframe_accessor(name):
  259. from pandas import DataFrame
  260. return _register_accessor(name, DataFrame)
  261. @doc(_register_accessor, klass="Series")
  262. def register_series_accessor(name):
  263. from pandas import Series
  264. return _register_accessor(name, Series)
  265. @doc(_register_accessor, klass="Index")
  266. def register_index_accessor(name):
  267. from pandas import Index
  268. return _register_accessor(name, Index)