order.py 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. from __future__ import annotations
  2. from dataclasses import dataclass
  3. from typing import ClassVar, cast
  4. try:
  5. from typing import Literal
  6. except ImportError:
  7. from typing_extensions import Literal # type: ignore
  8. import numpy as np
  9. from pandas import DataFrame
  10. from seaborn._core.scales import Scale
  11. from seaborn._core.groupby import GroupBy
  12. from seaborn._stats.base import Stat
  13. from seaborn.utils import _version_predates
  14. # From https://github.com/numpy/numpy/blob/main/numpy/lib/function_base.pyi
  15. _MethodKind = Literal[
  16. "inverted_cdf",
  17. "averaged_inverted_cdf",
  18. "closest_observation",
  19. "interpolated_inverted_cdf",
  20. "hazen",
  21. "weibull",
  22. "linear",
  23. "median_unbiased",
  24. "normal_unbiased",
  25. "lower",
  26. "higher",
  27. "midpoint",
  28. "nearest",
  29. ]
  30. @dataclass
  31. class Perc(Stat):
  32. """
  33. Replace observations with percentile values.
  34. Parameters
  35. ----------
  36. k : list of numbers or int
  37. If a list of numbers, this gives the percentiles (in [0, 100]) to compute.
  38. If an integer, compute `k` evenly-spaced percentiles between 0 and 100.
  39. For example, `k=5` computes the 0, 25, 50, 75, and 100th percentiles.
  40. method : str
  41. Method for interpolating percentiles between observed datapoints.
  42. See :func:`numpy.percentile` for valid options and more information.
  43. Examples
  44. --------
  45. .. include:: ../docstrings/objects.Perc.rst
  46. """
  47. k: int | list[float] = 5
  48. method: str = "linear"
  49. group_by_orient: ClassVar[bool] = True
  50. def _percentile(self, data: DataFrame, var: str) -> DataFrame:
  51. k = list(np.linspace(0, 100, self.k)) if isinstance(self.k, int) else self.k
  52. method = cast(_MethodKind, self.method)
  53. values = data[var].dropna()
  54. if _version_predates(np, "1.22"):
  55. res = np.percentile(values, k, interpolation=method) # type: ignore
  56. else:
  57. res = np.percentile(data[var].dropna(), k, method=method)
  58. return DataFrame({var: res, "percentile": k})
  59. def __call__(
  60. self, data: DataFrame, groupby: GroupBy, orient: str, scales: dict[str, Scale],
  61. ) -> DataFrame:
  62. var = {"x": "y", "y": "x"}[orient]
  63. return groupby.apply(data, self._percentile, var)