123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317 |
- from __future__ import annotations
- from dataclasses import dataclass, fields, field
- import textwrap
- from typing import Any, Callable, Union
- from collections.abc import Generator
- import numpy as np
- import pandas as pd
- import matplotlib as mpl
- from numpy import ndarray
- from pandas import DataFrame
- from matplotlib.artist import Artist
- from seaborn._core.scales import Scale
- from seaborn._core.properties import (
- PROPERTIES,
- Property,
- RGBATuple,
- DashPattern,
- DashPatternWithOffset,
- )
- from seaborn._core.exceptions import PlotSpecError
- class Mappable:
- def __init__(
- self,
- val: Any = None,
- depend: str | None = None,
- rc: str | None = None,
- auto: bool = False,
- grouping: bool = True,
- ):
- """
- Property that can be mapped from data or set directly, with flexible defaults.
- Parameters
- ----------
- val : Any
- Use this value as the default.
- depend : str
- Use the value of this feature as the default.
- rc : str
- Use the value of this rcParam as the default.
- auto : bool
- The default value will depend on other parameters at compile time.
- grouping : bool
- If True, use the mapped variable to define groups.
- """
- if depend is not None:
- assert depend in PROPERTIES
- if rc is not None:
- assert rc in mpl.rcParams
- self._val = val
- self._rc = rc
- self._depend = depend
- self._auto = auto
- self._grouping = grouping
- def __repr__(self):
- """Nice formatting for when object appears in Mark init signature."""
- if self._val is not None:
- s = f"<{repr(self._val)}>"
- elif self._depend is not None:
- s = f"<depend:{self._depend}>"
- elif self._rc is not None:
- s = f"<rc:{self._rc}>"
- elif self._auto:
- s = "<auto>"
- else:
- s = "<undefined>"
- return s
- @property
- def depend(self) -> Any:
- """Return the name of the feature to source a default value from."""
- return self._depend
- @property
- def grouping(self) -> bool:
- return self._grouping
- @property
- def default(self) -> Any:
- """Get the default value for this feature, or access the relevant rcParam."""
- if self._val is not None:
- return self._val
- elif self._rc is not None:
- return mpl.rcParams.get(self._rc)
- # TODO where is the right place to put this kind of type aliasing?
- MappableBool = Union[bool, Mappable]
- MappableString = Union[str, Mappable]
- MappableFloat = Union[float, Mappable]
- MappableColor = Union[str, tuple, Mappable]
- MappableStyle = Union[str, DashPattern, DashPatternWithOffset, Mappable]
- @dataclass
- class Mark:
- """Base class for objects that visually represent data."""
- artist_kws: dict = field(default_factory=dict)
- @property
- def _mappable_props(self):
- return {
- f.name: getattr(self, f.name) for f in fields(self)
- if isinstance(f.default, Mappable)
- }
- @property
- def _grouping_props(self):
- # TODO does it make sense to have variation within a Mark's
- # properties about whether they are grouping?
- return [
- f.name for f in fields(self)
- if isinstance(f.default, Mappable) and f.default.grouping
- ]
- # TODO make this method private? Would extender every need to call directly?
- def _resolve(
- self,
- data: DataFrame | dict[str, Any],
- name: str,
- scales: dict[str, Scale] | None = None,
- ) -> Any:
- """Obtain default, specified, or mapped value for a named feature.
- Parameters
- ----------
- data : DataFrame or dict with scalar values
- Container with data values for features that will be semantically mapped.
- name : string
- Identity of the feature / semantic.
- scales: dict
- Mapping from variable to corresponding scale object.
- Returns
- -------
- value or array of values
- Outer return type depends on whether `data` is a dict (implying that
- we want a single value) or DataFrame (implying that we want an array
- of values with matching length).
- """
- feature = self._mappable_props[name]
- prop = PROPERTIES.get(name, Property(name))
- directly_specified = not isinstance(feature, Mappable)
- return_multiple = isinstance(data, pd.DataFrame)
- return_array = return_multiple and not name.endswith("style")
- # Special case width because it needs to be resolved and added to the dataframe
- # during layer prep (so the Move operations use it properly).
- # TODO how does width *scaling* work, e.g. for violin width by count?
- if name == "width":
- directly_specified = directly_specified and name not in data
- if directly_specified:
- feature = prop.standardize(feature)
- if return_multiple:
- feature = [feature] * len(data)
- if return_array:
- feature = np.array(feature)
- return feature
- if name in data:
- if scales is None or name not in scales:
- # TODO Might this obviate the identity scale? Just don't add a scale?
- feature = data[name]
- else:
- scale = scales[name]
- value = data[name]
- try:
- feature = scale(value)
- except Exception as err:
- raise PlotSpecError._during("Scaling operation", name) from err
- if return_array:
- feature = np.asarray(feature)
- return feature
- if feature.depend is not None:
- # TODO add source_func or similar to transform the source value?
- # e.g. set linewidth as a proportion of pointsize?
- return self._resolve(data, feature.depend, scales)
- default = prop.standardize(feature.default)
- if return_multiple:
- default = [default] * len(data)
- if return_array:
- default = np.array(default)
- return default
- def _infer_orient(self, scales: dict) -> str: # TODO type scales
- # TODO The original version of this (in seaborn._base) did more checking.
- # Paring that down here for the prototype to see what restrictions make sense.
- # TODO rethink this to map from scale type to "DV priority" and use that?
- # e.g. Nominal > Discrete > Continuous
- x = 0 if "x" not in scales else scales["x"]._priority
- y = 0 if "y" not in scales else scales["y"]._priority
- if y > x:
- return "y"
- else:
- return "x"
- def _plot(
- self,
- split_generator: Callable[[], Generator],
- scales: dict[str, Scale],
- orient: str,
- ) -> None:
- """Main interface for creating a plot."""
- raise NotImplementedError()
- def _legend_artist(
- self, variables: list[str], value: Any, scales: dict[str, Scale],
- ) -> Artist | None:
- return None
- def resolve_properties(
- mark: Mark, data: DataFrame, scales: dict[str, Scale]
- ) -> dict[str, Any]:
- props = {
- name: mark._resolve(data, name, scales) for name in mark._mappable_props
- }
- return props
- def resolve_color(
- mark: Mark,
- data: DataFrame | dict,
- prefix: str = "",
- scales: dict[str, Scale] | None = None,
- ) -> RGBATuple | ndarray:
- """
- Obtain a default, specified, or mapped value for a color feature.
- This method exists separately to support the relationship between a
- color and its corresponding alpha. We want to respect alpha values that
- are passed in specified (or mapped) color values but also make use of a
- separate `alpha` variable, which can be mapped. This approach may also
- be extended to support mapping of specific color channels (i.e.
- luminance, chroma) in the future.
- Parameters
- ----------
- mark :
- Mark with the color property.
- data :
- Container with data values for features that will be semantically mapped.
- prefix :
- Support "color", "fillcolor", etc.
- """
- color = mark._resolve(data, f"{prefix}color", scales)
- if f"{prefix}alpha" in mark._mappable_props:
- alpha = mark._resolve(data, f"{prefix}alpha", scales)
- else:
- alpha = mark._resolve(data, "alpha", scales)
- def visible(x, axis=None):
- """Detect "invisible" colors to set alpha appropriately."""
- # TODO First clause only needed to handle non-rgba arrays,
- # which we are trying to handle upstream
- return np.array(x).dtype.kind != "f" or np.isfinite(x).all(axis)
- # Second check here catches vectors of strings with identity scale
- # It could probably be handled better upstream. This is a tricky problem
- if np.ndim(color) < 2 and all(isinstance(x, float) for x in color):
- if len(color) == 4:
- return mpl.colors.to_rgba(color)
- alpha = alpha if visible(color) else np.nan
- return mpl.colors.to_rgba(color, alpha)
- else:
- if np.ndim(color) == 2 and color.shape[1] == 4:
- return mpl.colors.to_rgba_array(color)
- alpha = np.where(visible(color, axis=1), alpha, np.nan)
- return mpl.colors.to_rgba_array(color, alpha)
- # TODO should we be implementing fill here too?
- # (i.e. set fillalpha to 0 when fill=False)
- def document_properties(mark):
- properties = [f.name for f in fields(mark) if isinstance(f.default, Mappable)]
- text = [
- "",
- " This mark defines the following properties:",
- textwrap.fill(
- ", ".join([f"|{p}|" for p in properties]),
- width=78, initial_indent=" " * 8, subsequent_indent=" " * 8,
- ),
- ]
- docstring_lines = mark.__doc__.split("\n")
- new_docstring = "\n".join([
- *docstring_lines[:2],
- *text,
- *docstring_lines[2:],
- ])
- mark.__doc__ = new_docstring
- return mark
|