123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778 |
- from __future__ import annotations
- from dataclasses import dataclass
- from typing import ClassVar, cast
- try:
- from typing import Literal
- except ImportError:
- from typing_extensions import Literal # type: ignore
- import numpy as np
- from pandas import DataFrame
- from seaborn._core.scales import Scale
- from seaborn._core.groupby import GroupBy
- from seaborn._stats.base import Stat
- from seaborn.utils import _version_predates
- # From https://github.com/numpy/numpy/blob/main/numpy/lib/function_base.pyi
- _MethodKind = Literal[
- "inverted_cdf",
- "averaged_inverted_cdf",
- "closest_observation",
- "interpolated_inverted_cdf",
- "hazen",
- "weibull",
- "linear",
- "median_unbiased",
- "normal_unbiased",
- "lower",
- "higher",
- "midpoint",
- "nearest",
- ]
- @dataclass
- class Perc(Stat):
- """
- Replace observations with percentile values.
- Parameters
- ----------
- k : list of numbers or int
- If a list of numbers, this gives the percentiles (in [0, 100]) to compute.
- If an integer, compute `k` evenly-spaced percentiles between 0 and 100.
- For example, `k=5` computes the 0, 25, 50, 75, and 100th percentiles.
- method : str
- Method for interpolating percentiles between observed datapoints.
- See :func:`numpy.percentile` for valid options and more information.
- Examples
- --------
- .. include:: ../docstrings/objects.Perc.rst
- """
- k: int | list[float] = 5
- method: str = "linear"
- group_by_orient: ClassVar[bool] = True
- def _percentile(self, data: DataFrame, var: str) -> DataFrame:
- k = list(np.linspace(0, 100, self.k)) if isinstance(self.k, int) else self.k
- method = cast(_MethodKind, self.method)
- values = data[var].dropna()
- if _version_predates(np, "1.22"):
- res = np.percentile(values, k, interpolation=method) # type: ignore
- else:
- res = np.percentile(data[var].dropna(), k, method=method)
- return DataFrame({var: res, "percentile": k})
- def __call__(
- self, data: DataFrame, groupby: GroupBy, orient: str, scales: dict[str, Scale],
- ) -> DataFrame:
- var = {"x": "y", "y": "x"}[orient]
- return groupby.apply(data, self._percentile, var)
|