categorical.py 117 KB


  1. from collections import namedtuple
  2. from textwrap import dedent
  3. import warnings
  4. from colorsys import rgb_to_hls
  5. from functools import partial
  6. import numpy as np
  7. import pandas as pd
  8. import matplotlib as mpl
  9. from matplotlib.collections import PatchCollection
  10. from matplotlib.patches import Rectangle
  11. import matplotlib.pyplot as plt
  12. from seaborn._core.typing import default, deprecated
  13. from seaborn._base import VectorPlotter, infer_orient, categorical_order
  14. from seaborn._stats.density import KDE
  15. from seaborn import utils
  16. from seaborn.utils import (
  17. desaturate,
  18. _check_argument,
  19. _draw_figure,
  20. _default_color,
  21. _get_patch_legend_artist,
  22. _get_transform_functions,
  23. _normalize_kwargs,
  24. _scatter_legend_artist,
  25. _version_predates,
  26. )
  27. from seaborn._compat import MarkerStyle
  28. from seaborn._statistics import EstimateAggregator, LetterValues
  29. from seaborn.palettes import light_palette
  30. from seaborn.axisgrid import FacetGrid, _facet_docs
  31. __all__ = [
  32. "catplot",
  33. "stripplot", "swarmplot",
  34. "boxplot", "violinplot", "boxenplot",
  35. "pointplot", "barplot", "countplot",
  36. ]
  37. class _CategoricalPlotter(VectorPlotter):
  38. wide_structure = {"x": "@columns", "y": "@values", "hue": "@columns"}
  39. flat_structure = {"y": "@values"}
  40. _legend_attributes = ["color"]
  41. def __init__(
  42. self,
  43. data=None,
  44. variables={},
  45. order=None,
  46. orient=None,
  47. require_numeric=False,
  48. color=None,
  49. legend="auto",
  50. ):
  51. super().__init__(data=data, variables=variables)
  52. # This method takes care of some bookkeeping that is necessary because the
  53. # original categorical plots (prior to the 2021 refactor) had some rules that
  54. # don't fit exactly into VectorPlotter logic. It may be wise to have a second
  55. # round of refactoring that moves the logic deeper, but this will keep things
  56. # relatively sensible for now.
  57. # For wide data, orient determines assignment to x/y differently from the
  58. # default VectorPlotter rules. If we do decide to make orient part of the
  59. # _base variable assignment, we'll want to figure out how to express that.
  60. if self.input_format == "wide" and orient in ["h", "y"]:
  61. self.plot_data = self.plot_data.rename(columns={"x": "y", "y": "x"})
  62. orig_variables = set(self.variables)
  63. orig_x = self.variables.pop("x", None)
  64. orig_y = self.variables.pop("y", None)
  65. orig_x_type = self.var_types.pop("x", None)
  66. orig_y_type = self.var_types.pop("y", None)
  67. if "x" in orig_variables:
  68. self.variables["y"] = orig_x
  69. self.var_types["y"] = orig_x_type
  70. if "y" in orig_variables:
  71. self.variables["x"] = orig_y
  72. self.var_types["x"] = orig_y_type
  73. # Initially there was more special code for wide-form data where plots were
  74. # multi-colored by default and then either palette or color could be used.
  75. # We want to provide backwards compatibility for this behavior in a relatively
  76. # simply way, so we delete the hue information when color is specified.
  77. if (
  78. self.input_format == "wide"
  79. and "hue" in self.variables
  80. and color is not None
  81. ):
  82. self.plot_data.drop("hue", axis=1)
  83. self.variables.pop("hue")
  84. # The concept of an "orientation" is important to the original categorical
  85. # plots, but there's no provision for it in VectorPlotter, so we need it here.
  86. # Note that it could be useful for the other functions in at least two ways
  87. # (orienting a univariate distribution plot from long-form data and selecting
  88. # the aggregation axis in lineplot), so we may want to eventually refactor it.
  89. self.orient = infer_orient(
  90. x=self.plot_data.get("x", None),
  91. y=self.plot_data.get("y", None),
  92. orient=orient,
  93. require_numeric=False,
  94. )
  95. self.legend = legend
  96. # Short-circuit in the case of an empty plot
  97. if not self.has_xy_data:
  98. return
  99. # Categorical plots can be "univariate" in which case they get an anonymous
  100. # category label on the opposite axis. Note: this duplicates code in the core
  101. # scale_categorical function. We need to do it here because of the next line.
  102. if self.orient not in self.variables:
  103. self.variables[self.orient] = None
  104. self.var_types[self.orient] = "categorical"
  105. self.plot_data[self.orient] = ""
  106. # Categorical variables have discrete levels that we need to track
  107. cat_levels = categorical_order(self.plot_data[self.orient], order)
  108. self.var_levels[self.orient] = cat_levels
  109. def _hue_backcompat(self, color, palette, hue_order, force_hue=False):
  110. """Implement backwards compatibility for hue parametrization.
  111. Note: the force_hue parameter is used so that functions can be shown to
  112. pass existing tests during refactoring and then tested for new behavior.
  113. It can be removed after completion of the work.
  114. """
  115. # The original categorical functions applied a palette to the categorical axis
  116. # by default. We want to require an explicit hue mapping, to be more consistent
  117. # with how things work elsewhere now. I don't think there's any good way to
  118. # do this gently -- because it's triggered by the default value of hue=None,
  119. # users would always get a warning, unless we introduce some sentinel "default"
  120. # argument for this change. That's possible, but asking users to set `hue=None`
  121. # on every call is annoying.
  122. # We are keeping the logic for implementing the old behavior in with the current
  123. # system so that (a) we can punt on that decision and (b) we can ensure that
  124. # refactored code passes old tests.
  125. default_behavior = color is None or palette is not None
  126. if force_hue and "hue" not in self.variables and default_behavior:
  127. self._redundant_hue = True
  128. self.plot_data["hue"] = self.plot_data[self.orient]
  129. self.variables["hue"] = self.variables[self.orient]
  130. self.var_types["hue"] = "categorical"
  131. hue_order = self.var_levels[self.orient]
  132. # Because we convert the categorical axis variable to string,
  133. # we need to update a dictionary palette too
  134. if isinstance(palette, dict):
  135. palette = {str(k): v for k, v in palette.items()}
  136. else:
  137. if "hue" in self.variables:
  138. redundant = (self.plot_data["hue"] == self.plot_data[self.orient]).all()
  139. else:
  140. redundant = False
  141. self._redundant_hue = redundant
  142. # Previously, categorical plots had a trick where color= could seed the palette.
  143. # Because that's an explicit parameterization, we are going to give it one
  144. # release cycle with a warning before removing.
  145. if "hue" in self.variables and palette is None and color is not None:
  146. if not isinstance(color, str):
  147. color = mpl.colors.to_hex(color)
  148. palette = f"dark:{color}"
  149. msg = (
  150. "\n\nSetting a gradient palette using color= is deprecated and will be "
  151. f"removed in v0.14.0. Set `palette='{palette}'` for the same effect.\n"
  152. )
  153. warnings.warn(msg, FutureWarning, stacklevel=3)
  154. return palette, hue_order
  155. def _palette_without_hue_backcompat(self, palette, hue_order):
  156. """Provide one cycle where palette= implies hue= when not provided"""
  157. if "hue" not in self.variables and palette is not None:
  158. msg = (
  159. "\n\nPassing `palette` without assigning `hue` is deprecated "
  160. f"and will be removed in v0.14.0. Assign the `{self.orient}` variable "
  161. "to `hue` and set `legend=False` for the same effect.\n"
  162. )
  163. warnings.warn(msg, FutureWarning, stacklevel=3)
  164. self.legend = False
  165. self.plot_data["hue"] = self.plot_data[self.orient]
  166. self.variables["hue"] = self.variables.get(self.orient)
  167. self.var_types["hue"] = self.var_types.get(self.orient)
  168. hue_order = self.var_levels.get(self.orient)
  169. self._var_levels.pop("hue", None)
  170. return hue_order
  171. def _point_kwargs_backcompat(self, scale, join, kwargs):
  172. """Provide two cycles where scale= and join= work, but redirect to kwargs."""
  173. if scale is not deprecated:
  174. lw = mpl.rcParams["lines.linewidth"] * 1.8 * scale
  175. mew = lw * .75
  176. ms = lw * 2
  177. msg = (
  178. "\n\n"
  179. "The `scale` parameter is deprecated and will be removed in v0.15.0. "
  180. "You can now control the size of each plot element using matplotlib "
  181. "`Line2D` parameters (e.g., `linewidth`, `markersize`, etc.)."
  182. "\n"
  183. )
  184. warnings.warn(msg, stacklevel=3)
  185. kwargs.update(linewidth=lw, markeredgewidth=mew, markersize=ms)
  186. if join is not deprecated:
  187. msg = (
  188. "\n\n"
  189. "The `join` parameter is deprecated and will be removed in v0.15.0."
  190. )
  191. if not join:
  192. msg += (
  193. " You can remove the line between points with `linestyle='none'`."
  194. )
  195. kwargs.update(linestyle="")
  196. msg += "\n"
  197. warnings.warn(msg, stacklevel=3)
  198. def _err_kws_backcompat(self, err_kws, errcolor, errwidth, capsize):
  199. """Provide two cycles where existing signature-level err_kws are handled."""
  200. def deprecate_err_param(name, key, val):
  201. if val is deprecated:
  202. return
  203. suggest = f"err_kws={{'{key}': {val!r}}}"
  204. msg = (
  205. f"\n\nThe `{name}` parameter is deprecated. And will be removed "
  206. f"in v0.15.0. Pass `{suggest}` instead.\n"
  207. )
  208. warnings.warn(msg, FutureWarning, stacklevel=4)
  209. err_kws[key] = val
  210. if errcolor is not None:
  211. deprecate_err_param("errcolor", "color", errcolor)
  212. deprecate_err_param("errwidth", "linewidth", errwidth)
  213. if capsize is None:
  214. capsize = 0
  215. msg = (
  216. "\n\nPassing `capsize=None` is deprecated and will be removed "
  217. "in v0.15.0. Pass `capsize=0` to disable caps.\n"
  218. )
  219. warnings.warn(msg, FutureWarning, stacklevel=3)
  220. return err_kws, capsize
  221. def _violin_scale_backcompat(self, scale, scale_hue, density_norm, common_norm):
  222. """Provide two cycles of backcompat for scale kwargs"""
  223. if scale is not deprecated:
  224. density_norm = scale
  225. msg = (
  226. "\n\nThe `scale` parameter has been renamed and will be removed "
  227. f"in v0.15.0. Pass `density_norm={scale!r}` for the same effect."
  228. )
  229. warnings.warn(msg, FutureWarning, stacklevel=3)
  230. if scale_hue is not deprecated:
  231. common_norm = scale_hue
  232. msg = (
  233. "\n\nThe `scale_hue` parameter has been replaced and will be removed "
  234. f"in v0.15.0. Pass `common_norm={not scale_hue}` for the same effect."
  235. )
  236. warnings.warn(msg, FutureWarning, stacklevel=3)
  237. return density_norm, common_norm
  238. def _violin_bw_backcompat(self, bw, bw_method):
  239. """Provide two cycles of backcompat for violin bandwidth parameterization."""
  240. if bw is not deprecated:
  241. bw_method = bw
  242. msg = dedent(f"""\n
  243. The `bw` parameter is deprecated in favor of `bw_method`/`bw_adjust`.
  244. Setting `bw_method={bw!r}`, but please see docs for the new parameters
  245. and update your code. This will become an error in seaborn v0.15.0.
  246. """)
  247. warnings.warn(msg, FutureWarning, stacklevel=3)
  248. return bw_method
  249. def _boxen_scale_backcompat(self, scale, width_method):
  250. """Provide two cycles of backcompat for scale kwargs"""
  251. if scale is not deprecated:
  252. width_method = scale
  253. msg = (
  254. "\n\nThe `scale` parameter has been renamed to `width_method` and "
  255. f"will be removed in v0.15. Pass `width_method={scale!r}"
  256. )
  257. if scale == "area":
  258. msg += ", but note that the result for 'area' will appear different."
  259. else:
  260. msg += " for the same effect."
  261. warnings.warn(msg, FutureWarning, stacklevel=3)
  262. return width_method
  263. def _complement_color(self, color, base_color, hue_map):
  264. """Allow a color to be set automatically using a basis of comparison."""
  265. if color == "gray":
  266. msg = (
  267. 'Use "auto" to set automatic grayscale colors. From v0.14.0, '
  268. '"gray" will default to matplotlib\'s definition.'
  269. )
  270. warnings.warn(msg, FutureWarning, stacklevel=3)
  271. color = "auto"
  272. elif color is None or color is default:
  273. color = "auto"
  274. if color != "auto":
  275. return color
  276. if hue_map.lookup_table is None:
  277. if base_color is None:
  278. return None
  279. basis = [mpl.colors.to_rgb(base_color)]
  280. else:
  281. basis = [mpl.colors.to_rgb(c) for c in hue_map.lookup_table.values()]
  282. unique_colors = np.unique(basis, axis=0)
  283. light_vals = [rgb_to_hls(*rgb[:3])[1] for rgb in unique_colors]
  284. lum = min(light_vals) * .6
  285. return (lum, lum, lum)
  286. def _map_prop_with_hue(self, name, value, fallback, plot_kws):
  287. """Support pointplot behavior of modifying the marker/linestyle with hue."""
  288. if value is default:
  289. value = plot_kws.pop(name, fallback)
  290. if "hue" in self.variables:
  291. levels = self._hue_map.levels
  292. if isinstance(value, list):
  293. mapping = {k: v for k, v in zip(levels, value)}
  294. else:
  295. mapping = {k: value for k in levels}
  296. else:
  297. mapping = {None: value}
  298. return mapping
  299. def _adjust_cat_axis(self, ax, axis):
  300. """Set ticks and limits for a categorical variable."""
  301. # Note: in theory, this could happen in _attach for all categorical axes
  302. # But two reasons not to do that:
  303. # - If it happens before plotting, autoscaling messes up the plot limits
  304. # - It would change existing plots from other seaborn functions
  305. if self.var_types[axis] != "categorical":
  306. return
  307. # If both x/y data are empty, the correct way to set up the plot is
  308. # somewhat undefined; because we don't add null category data to the plot in
  309. # this case we don't *have* a categorical axis (yet), so best to just bail.
  310. if self.plot_data[axis].empty:
  311. return
  312. # We can infer the total number of categories (including those from previous
  313. # plots that are not part of the plot we are currently making) from the number
  314. # of ticks, which matplotlib sets up while doing unit conversion. This feels
  315. # slightly risky, as if we are relying on something that may be a matplotlib
  316. # implementation detail. But I cannot think of a better way to keep track of
  317. # the state from previous categorical calls (see GH2516 for context)
  318. n = len(getattr(ax, f"get_{axis}ticks")())
  319. if axis == "x":
  320. ax.xaxis.grid(False)
  321. ax.set_xlim(-.5, n - .5, auto=None)
  322. else:
  323. ax.yaxis.grid(False)
  324. # Note limits that correspond to previously-inverted y axis
  325. ax.set_ylim(n - .5, -.5, auto=None)
  326. def _dodge_needed(self):
  327. """Return True when use of `hue` would cause overlaps."""
  328. groupers = list({self.orient, "col", "row"} & set(self.variables))
  329. if "hue" in self.variables:
  330. orient = self.plot_data[groupers].value_counts()
  331. paired = self.plot_data[[*groupers, "hue"]].value_counts()
  332. return orient.size != paired.size
  333. return False
  334. def _dodge(self, keys, data):
  335. """Apply a dodge transform to coordinates in place."""
  336. hue_idx = self._hue_map.levels.index(keys["hue"])
  337. n = len(self._hue_map.levels)
  338. data["width"] /= n
  339. full_width = data["width"] * n
  340. offset = data["width"] * hue_idx + data["width"] / 2 - full_width / 2
  341. data[self.orient] += offset
  342. def _invert_scale(self, ax, data, vars=("x", "y")):
  343. """Undo scaling after computation so data are plotted correctly."""
  344. for var in vars:
  345. _, inv = _get_transform_functions(ax, var[0])
  346. if var == self.orient and "width" in data:
  347. hw = data["width"] / 2
  348. data["edge"] = inv(data[var] - hw)
  349. data["width"] = inv(data[var] + hw) - data["edge"].to_numpy()
  350. for suf in ["", "min", "max"]:
  351. if (col := f"{var}{suf}") in data:
  352. data[col] = inv(data[col])
  353. def _configure_legend(self, ax, func, common_kws=None, semantic_kws=None):
  354. if self.legend == "auto":
  355. show_legend = not self._redundant_hue and self.input_format != "wide"
  356. else:
  357. show_legend = bool(self.legend)
  358. if show_legend:
  359. self.add_legend_data(ax, func, common_kws, semantic_kws=semantic_kws)
  360. handles, _ = ax.get_legend_handles_labels()
  361. if handles:
  362. ax.legend(title=self.legend_title)
  363. @property
  364. def _native_width(self):
  365. """Return unit of width separating categories on native numeric scale."""
  366. # Categorical data always have a unit width
  367. if self.var_types[self.orient] == "categorical":
  368. return 1
  369. # Otherwise, define the width as the smallest space between observations
  370. unique_values = np.unique(self.comp_data[self.orient])
  371. if len(unique_values) > 1:
  372. native_width = np.nanmin(np.diff(unique_values))
  373. else:
  374. native_width = 1
  375. return native_width
  376. def _nested_offsets(self, width, dodge):
  377. """Return offsets for each hue level for dodged plots."""
  378. offsets = None
  379. if "hue" in self.variables and self._hue_map.levels is not None:
  380. n_levels = len(self._hue_map.levels)
  381. if dodge:
  382. each_width = width / n_levels
  383. offsets = np.linspace(0, width - each_width, n_levels)
  384. offsets -= offsets.mean()
  385. else:
  386. offsets = np.zeros(n_levels)
  387. return offsets
  388. # Note that the plotting methods here aim (in most cases) to produce the
  389. # exact same artists as the original (pre 0.12) version of the code, so
  390. # there is some weirdness that might not otherwise be clean or make sense in
  391. # this context, such as adding empty artists for combinations of variables
  392. # with no observations
  393. def plot_strips(
  394. self,
  395. jitter,
  396. dodge,
  397. color,
  398. plot_kws,
  399. ):
  400. width = .8 * self._native_width
  401. offsets = self._nested_offsets(width, dodge)
  402. if jitter is True:
  403. jlim = 0.1
  404. else:
  405. jlim = float(jitter)
  406. if "hue" in self.variables and dodge and self._hue_map.levels is not None:
  407. jlim /= len(self._hue_map.levels)
  408. jlim *= self._native_width
  409. jitterer = partial(np.random.uniform, low=-jlim, high=+jlim)
  410. iter_vars = [self.orient]
  411. if dodge:
  412. iter_vars.append("hue")
  413. ax = self.ax
  414. dodge_move = jitter_move = 0
  415. if "marker" in plot_kws and not MarkerStyle(plot_kws["marker"]).is_filled():
  416. plot_kws.pop("edgecolor", None)
  417. for sub_vars, sub_data in self.iter_data(iter_vars,
  418. from_comp_data=True,
  419. allow_empty=True):
  420. ax = self._get_axes(sub_vars)
  421. if offsets is not None and (offsets != 0).any():
  422. dodge_move = offsets[sub_data["hue"].map(self._hue_map.levels.index)]
  423. jitter_move = jitterer(size=len(sub_data)) if len(sub_data) > 1 else 0
  424. adjusted_data = sub_data[self.orient] + dodge_move + jitter_move
  425. sub_data[self.orient] = adjusted_data
  426. self._invert_scale(ax, sub_data)
  427. points = ax.scatter(sub_data["x"], sub_data["y"], color=color, **plot_kws)
  428. if "hue" in self.variables:
  429. points.set_facecolors(self._hue_map(sub_data["hue"]))
  430. self._configure_legend(ax, _scatter_legend_artist, common_kws=plot_kws)
  431. def plot_swarms(
  432. self,
  433. dodge,
  434. color,
  435. warn_thresh,
  436. plot_kws,
  437. ):
  438. width = .8 * self._native_width
  439. offsets = self._nested_offsets(width, dodge)
  440. iter_vars = [self.orient]
  441. if dodge:
  442. iter_vars.append("hue")
  443. ax = self.ax
  444. point_collections = {}
  445. dodge_move = 0
  446. if "marker" in plot_kws and not MarkerStyle(plot_kws["marker"]).is_filled():
  447. plot_kws.pop("edgecolor", None)
  448. for sub_vars, sub_data in self.iter_data(iter_vars,
  449. from_comp_data=True,
  450. allow_empty=True):
  451. ax = self._get_axes(sub_vars)
  452. if offsets is not None:
  453. dodge_move = offsets[sub_data["hue"].map(self._hue_map.levels.index)]
  454. if not sub_data.empty:
  455. sub_data[self.orient] = sub_data[self.orient] + dodge_move
  456. self._invert_scale(ax, sub_data)
  457. points = ax.scatter(sub_data["x"], sub_data["y"], color=color, **plot_kws)
  458. if "hue" in self.variables:
  459. points.set_facecolors(self._hue_map(sub_data["hue"]))
  460. if not sub_data.empty:
  461. point_collections[(ax, sub_data[self.orient].iloc[0])] = points
  462. beeswarm = Beeswarm(width=width, orient=self.orient, warn_thresh=warn_thresh)
  463. for (ax, center), points in point_collections.items():
  464. if points.get_offsets().shape[0] > 1:
  465. def draw(points, renderer, *, center=center):
  466. beeswarm(points, center)
  467. if self.orient == "y":
  468. scalex = False
  469. scaley = ax.get_autoscaley_on()
  470. else:
  471. scalex = ax.get_autoscalex_on()
  472. scaley = False
  473. # This prevents us from undoing the nice categorical axis limits
  474. # set in _adjust_cat_axis, because that method currently leave
  475. # the autoscale flag in its original setting. It may be better
  476. # to disable autoscaling there to avoid needing to do this.
  477. fixed_scale = self.var_types[self.orient] == "categorical"
  478. ax.update_datalim(points.get_datalim(ax.transData))
  479. if not fixed_scale and (scalex or scaley):
  480. ax.autoscale_view(scalex=scalex, scaley=scaley)
  481. super(points.__class__, points).draw(renderer)
  482. points.draw = draw.__get__(points)
  483. _draw_figure(ax.figure)
  484. self._configure_legend(ax, _scatter_legend_artist, plot_kws)
  485. def plot_boxes(
  486. self,
  487. width,
  488. dodge,
  489. gap,
  490. fill,
  491. whis,
  492. color,
  493. linecolor,
  494. linewidth,
  495. fliersize,
  496. plot_kws, # TODO rename user_kws?
  497. ):
  498. iter_vars = ["hue"]
  499. value_var = {"x": "y", "y": "x"}[self.orient]
  500. def get_props(element, artist=mpl.lines.Line2D):
  501. return _normalize_kwargs(plot_kws.pop(f"{element}props", {}), artist)
  502. if not fill and linewidth is None:
  503. linewidth = mpl.rcParams["lines.linewidth"]
  504. plot_kws.setdefault("shownotches", plot_kws.pop("notch", False))
  505. box_artist = mpl.patches.Rectangle if fill else mpl.lines.Line2D
  506. props = {
  507. "box": get_props("box", box_artist),
  508. "median": get_props("median"),
  509. "whisker": get_props("whisker"),
  510. "flier": get_props("flier"),
  511. "cap": get_props("cap"),
  512. }
  513. props["median"].setdefault("solid_capstyle", "butt")
  514. props["whisker"].setdefault("solid_capstyle", "butt")
  515. props["flier"].setdefault("markersize", fliersize)
  516. ax = self.ax
  517. for sub_vars, sub_data in self.iter_data(iter_vars,
  518. from_comp_data=True,
  519. allow_empty=False):
  520. ax = self._get_axes(sub_vars)
  521. grouped = sub_data.groupby(self.orient)[value_var]
  522. value_data = [x.to_numpy() for _, x in grouped]
  523. stats = pd.DataFrame(mpl.cbook.boxplot_stats(value_data, whis=whis))
  524. positions = grouped.grouper.result_index.to_numpy(dtype=float)
  525. orig_width = width * self._native_width
  526. data = pd.DataFrame({self.orient: positions, "width": orig_width})
  527. if dodge:
  528. self._dodge(sub_vars, data)
  529. if gap:
  530. data["width"] *= 1 - gap
  531. capwidth = plot_kws.get("capwidths", 0.5 * data["width"])
  532. self._invert_scale(ax, data)
  533. _, inv = _get_transform_functions(ax, value_var)
  534. for stat in ["mean", "med", "q1", "q3", "cilo", "cihi", "whislo", "whishi"]:
  535. stats[stat] = inv(stats[stat])
  536. stats["fliers"] = stats["fliers"].map(inv)
  537. linear_orient_scale = getattr(ax, f"get_{self.orient}scale")() == "linear"
  538. maincolor = self._hue_map(sub_vars["hue"]) if "hue" in sub_vars else color
  539. if fill:
  540. boxprops = {
  541. "facecolor": maincolor, "edgecolor": linecolor, **props["box"]
  542. }
  543. medianprops = {"color": linecolor, **props["median"]}
  544. whiskerprops = {"color": linecolor, **props["whisker"]}
  545. flierprops = {"markeredgecolor": linecolor, **props["flier"]}
  546. capprops = {"color": linecolor, **props["cap"]}
  547. else:
  548. boxprops = {"color": maincolor, **props["box"]}
  549. medianprops = {"color": maincolor, **props["median"]}
  550. whiskerprops = {"color": maincolor, **props["whisker"]}
  551. flierprops = {"markeredgecolor": maincolor, **props["flier"]}
  552. capprops = {"color": maincolor, **props["cap"]}
  553. if linewidth is not None:
  554. for prop_dict in [boxprops, medianprops, whiskerprops, capprops]:
  555. prop_dict.setdefault("linewidth", linewidth)
  556. default_kws = dict(
  557. bxpstats=stats.to_dict("records"),
  558. positions=data[self.orient],
  559. # Set width to 0 to avoid going out of domain
  560. widths=data["width"] if linear_orient_scale else 0,
  561. patch_artist=fill,
  562. vert=self.orient == "x",
  563. manage_ticks=False,
  564. boxprops=boxprops,
  565. medianprops=medianprops,
  566. whiskerprops=whiskerprops,
  567. flierprops=flierprops,
  568. capprops=capprops,
  569. # Added in matplotlib 3.6.0; see below
  570. # capwidths=capwidth,
  571. **(
  572. {} if _version_predates(mpl, "3.6.0")
  573. else {"capwidths": capwidth}
  574. )
  575. )
  576. boxplot_kws = {**default_kws, **plot_kws}
  577. artists = ax.bxp(**boxplot_kws)
  578. # Reset artist widths after adding so everything stays positive
  579. ori_idx = ["x", "y"].index(self.orient)
  580. if not linear_orient_scale:
  581. for i, box in enumerate(data.to_dict("records")):
  582. p0 = box["edge"]
  583. p1 = box["edge"] + box["width"]
  584. if artists["boxes"]:
  585. box_artist = artists["boxes"][i]
  586. if fill:
  587. box_verts = box_artist.get_path().vertices.T
  588. else:
  589. box_verts = box_artist.get_data()
  590. box_verts[ori_idx][0] = p0
  591. box_verts[ori_idx][3:] = p0
  592. box_verts[ori_idx][1:3] = p1
  593. if not fill:
  594. # When fill is True, the data get changed in place
  595. box_artist.set_data(box_verts)
  596. ax.update_datalim(
  597. np.transpose(box_verts),
  598. updatex=self.orient == "x",
  599. updatey=self.orient == "y",
  600. )
  601. if artists["medians"]:
  602. verts = artists["medians"][i].get_xydata().T
  603. verts[ori_idx][:] = p0, p1
  604. artists["medians"][i].set_data(verts)
  605. if artists["caps"]:
  606. f_fwd, f_inv = _get_transform_functions(ax, self.orient)
  607. for line in artists["caps"][2 * i:2 * i + 2]:
  608. p0 = f_inv(f_fwd(box[self.orient]) - capwidth[i] / 2)
  609. p1 = f_inv(f_fwd(box[self.orient]) + capwidth[i] / 2)
  610. verts = line.get_xydata().T
  611. verts[ori_idx][:] = p0, p1
  612. line.set_data(verts)
  613. ax.add_container(BoxPlotContainer(artists))
  614. legend_artist = _get_patch_legend_artist(fill)
  615. self._configure_legend(ax, legend_artist, boxprops)
  616. def plot_boxens(
  617. self,
  618. width,
  619. dodge,
  620. gap,
  621. fill,
  622. color,
  623. linecolor,
  624. linewidth,
  625. width_method,
  626. k_depth,
  627. outlier_prop,
  628. trust_alpha,
  629. showfliers,
  630. box_kws,
  631. flier_kws,
  632. line_kws,
  633. plot_kws,
  634. ):
  635. iter_vars = [self.orient, "hue"]
  636. value_var = {"x": "y", "y": "x"}[self.orient]
  637. estimator = LetterValues(k_depth, outlier_prop, trust_alpha)
  638. width_method_options = ["exponential", "linear", "area"]
  639. _check_argument("width_method", width_method_options, width_method)
  640. box_kws = plot_kws if box_kws is None else {**plot_kws, **box_kws}
  641. flier_kws = {} if flier_kws is None else flier_kws.copy()
  642. line_kws = {} if line_kws is None else line_kws.copy()
  643. if linewidth is None:
  644. if fill:
  645. linewidth = 0.5 * mpl.rcParams["lines.linewidth"]
  646. else:
  647. linewidth = mpl.rcParams["lines.linewidth"]
  648. ax = self.ax
  649. for sub_vars, sub_data in self.iter_data(iter_vars,
  650. from_comp_data=True,
  651. allow_empty=False):
  652. ax = self._get_axes(sub_vars)
  653. _, inv_ori = _get_transform_functions(ax, self.orient)
  654. _, inv_val = _get_transform_functions(ax, value_var)
  655. # Statistics
  656. lv_data = estimator(sub_data[value_var])
  657. n = lv_data["k"] * 2 - 1
  658. vals = lv_data["values"]
  659. pos_data = pd.DataFrame({
  660. self.orient: [sub_vars[self.orient]],
  661. "width": [width * self._native_width],
  662. })
  663. if dodge:
  664. self._dodge(sub_vars, pos_data)
  665. if gap:
  666. pos_data["width"] *= 1 - gap
  667. # Letter-value boxes
  668. levels = lv_data["levels"]
  669. exponent = (levels - 1 - lv_data["k"]).astype(float)
  670. if width_method == "linear":
  671. rel_widths = levels + 1
  672. elif width_method == "exponential":
  673. rel_widths = 2 ** exponent
  674. elif width_method == "area":
  675. tails = levels < (lv_data["k"] - 1)
  676. rel_widths = 2 ** (exponent - tails) / np.diff(lv_data["values"])
  677. center = pos_data[self.orient].item()
  678. widths = rel_widths / rel_widths.max() * pos_data["width"].item()
  679. box_vals = inv_val(vals)
  680. box_pos = inv_ori(center - widths / 2)
  681. box_heights = inv_val(vals[1:]) - inv_val(vals[:-1])
  682. box_widths = inv_ori(center + widths / 2) - inv_ori(center - widths / 2)
  683. maincolor = self._hue_map(sub_vars["hue"]) if "hue" in sub_vars else color
  684. flier_colors = {
  685. "facecolor": "none", "edgecolor": ".45" if fill else maincolor
  686. }
  687. if fill:
  688. cmap = light_palette(maincolor, as_cmap=True)
  689. boxcolors = cmap(2 ** ((exponent + 2) / 3))
  690. else:
  691. boxcolors = maincolor
  692. boxen = []
  693. for i in range(n):
  694. if self.orient == "x":
  695. xy = (box_pos[i], box_vals[i])
  696. w, h = (box_widths[i], box_heights[i])
  697. else:
  698. xy = (box_vals[i], box_pos[i])
  699. w, h = (box_heights[i], box_widths[i])
  700. boxen.append(Rectangle(xy, w, h))
  701. if fill:
  702. box_colors = {"facecolors": boxcolors, "edgecolors": linecolor}
  703. else:
  704. box_colors = {"facecolors": "none", "edgecolors": boxcolors}
  705. collection_kws = {**box_colors, "linewidth": linewidth, **box_kws}
  706. ax.add_collection(PatchCollection(boxen, **collection_kws), autolim=False)
  707. ax.update_datalim(
  708. np.column_stack([box_vals, box_vals]),
  709. updatex=self.orient == "y",
  710. updatey=self.orient == "x",
  711. )
  712. # Median line
  713. med = lv_data["median"]
  714. hw = pos_data["width"].item() / 2
  715. if self.orient == "x":
  716. x, y = inv_ori([center - hw, center + hw]), inv_val([med, med])
  717. else:
  718. x, y = inv_val([med, med]), inv_ori([center - hw, center + hw])
  719. default_kws = {
  720. "color": linecolor if fill else maincolor,
  721. "solid_capstyle": "butt",
  722. "linewidth": 1.25 * linewidth,
  723. }
  724. ax.plot(x, y, **{**default_kws, **line_kws})
  725. # Outliers ("fliers")
  726. if showfliers:
  727. vals = inv_val(lv_data["fliers"])
  728. pos = np.full(len(vals), inv_ori(pos_data[self.orient].item()))
  729. x, y = (pos, vals) if self.orient == "x" else (vals, pos)
  730. ax.scatter(x, y, **{**flier_colors, "s": 25, **flier_kws})
  731. ax.autoscale_view(scalex=self.orient == "y", scaley=self.orient == "x")
  732. legend_artist = _get_patch_legend_artist(fill)
  733. common_kws = {**box_kws, "linewidth": linewidth, "edgecolor": linecolor}
  734. self._configure_legend(ax, legend_artist, common_kws)
  735. def plot_violins(
  736. self,
  737. width,
  738. dodge,
  739. gap,
  740. split,
  741. color,
  742. fill,
  743. linecolor,
  744. linewidth,
  745. inner,
  746. density_norm,
  747. common_norm,
  748. kde_kws,
  749. inner_kws,
  750. plot_kws,
  751. ):
  752. iter_vars = [self.orient, "hue"]
  753. value_var = {"x": "y", "y": "x"}[self.orient]
  754. inner_options = ["box", "quart", "stick", "point", None]
  755. _check_argument("inner", inner_options, inner, prefix=True)
  756. _check_argument("density_norm", ["area", "count", "width"], density_norm)
  757. if linewidth is None:
  758. if fill:
  759. linewidth = 1.25 * mpl.rcParams["patch.linewidth"]
  760. else:
  761. linewidth = mpl.rcParams["lines.linewidth"]
  762. if inner is not None and inner.startswith("box"):
  763. box_width = inner_kws.pop("box_width", linewidth * 4.5)
  764. whis_width = inner_kws.pop("whis_width", box_width / 3)
  765. marker = inner_kws.pop("marker", "_" if self.orient == "x" else "|")
  766. kde = KDE(**kde_kws)
  767. ax = self.ax
  768. violin_data = []
  769. # Iterate through all the data splits once to compute the KDEs
  770. for sub_vars, sub_data in self.iter_data(iter_vars,
  771. from_comp_data=True,
  772. allow_empty=False):
  773. sub_data["weight"] = sub_data.get("weights", 1)
  774. stat_data = kde._transform(sub_data, value_var, [])
  775. maincolor = self._hue_map(sub_vars["hue"]) if "hue" in sub_vars else color
  776. if not fill:
  777. linecolor = maincolor
  778. maincolor = "none"
  779. default_kws = dict(
  780. facecolor=maincolor,
  781. edgecolor=linecolor,
  782. linewidth=linewidth,
  783. )
  784. violin_data.append({
  785. "position": sub_vars[self.orient],
  786. "observations": sub_data[value_var],
  787. "density": stat_data["density"],
  788. "support": stat_data[value_var],
  789. "kwargs": {**default_kws, **plot_kws},
  790. "sub_vars": sub_vars,
  791. "ax": self._get_axes(sub_vars),
  792. })
  793. # Once we've computed all the KDEs, get statistics for normalization
  794. def vars_to_key(sub_vars):
  795. return tuple((k, v) for k, v in sub_vars.items() if k != self.orient)
  796. norm_keys = [vars_to_key(violin["sub_vars"]) for violin in violin_data]
  797. if common_norm:
  798. common_max_density = np.nanmax([v["density"].max() for v in violin_data])
  799. common_max_count = np.nanmax([len(v["observations"]) for v in violin_data])
  800. max_density = {key: common_max_density for key in norm_keys}
  801. max_count = {key: common_max_count for key in norm_keys}
  802. else:
  803. with warnings.catch_warnings():
  804. # Ignore warning when all violins are singular; it's not important
  805. warnings.filterwarnings('ignore', "All-NaN (slice|axis) encountered")
  806. max_density = {
  807. key: np.nanmax([
  808. v["density"].max() for v in violin_data
  809. if vars_to_key(v["sub_vars"]) == key
  810. ]) for key in norm_keys
  811. }
  812. max_count = {
  813. key: np.nanmax([
  814. len(v["observations"]) for v in violin_data
  815. if vars_to_key(v["sub_vars"]) == key
  816. ]) for key in norm_keys
  817. }
  818. real_width = width * self._native_width
  819. # Now iterate through the violins again to apply the normalization and plot
  820. for violin in violin_data:
  821. index = pd.RangeIndex(0, max(len(violin["support"]), 1))
  822. data = pd.DataFrame({
  823. self.orient: violin["position"],
  824. value_var: violin["support"],
  825. "density": violin["density"],
  826. "width": real_width,
  827. }, index=index)
  828. if dodge:
  829. self._dodge(violin["sub_vars"], data)
  830. if gap:
  831. data["width"] *= 1 - gap
  832. # Normalize the density across the distribution(s) and relative to the width
  833. norm_key = vars_to_key(violin["sub_vars"])
  834. hw = data["width"] / 2
  835. peak_density = violin["density"].max()
  836. if np.isnan(peak_density):
  837. span = 1
  838. elif density_norm == "area":
  839. span = data["density"] / max_density[norm_key]
  840. elif density_norm == "count":
  841. count = len(violin["observations"])
  842. span = data["density"] / peak_density * (count / max_count[norm_key])
  843. elif density_norm == "width":
  844. span = data["density"] / peak_density
  845. span = span * hw * (2 if split else 1)
  846. # Handle split violins (i.e. asymmetric spans)
  847. right_side = (
  848. 0 if "hue" not in self.variables
  849. else self._hue_map.levels.index(violin["sub_vars"]["hue"]) % 2
  850. )
  851. if split:
  852. offsets = (hw, span - hw) if right_side else (span - hw, hw)
  853. else:
  854. offsets = span, span
  855. ax = violin["ax"]
  856. _, invx = _get_transform_functions(ax, "x")
  857. _, invy = _get_transform_functions(ax, "y")
  858. inv_pos = {"x": invx, "y": invy}[self.orient]
  859. inv_val = {"x": invx, "y": invy}[value_var]
  860. linecolor = violin["kwargs"]["edgecolor"]
  861. # Handle singular datasets (one or more observations with no variance
  862. if np.isnan(peak_density):
  863. pos = data[self.orient].iloc[0]
  864. val = violin["observations"].mean()
  865. if self.orient == "x":
  866. x, y = [pos - offsets[0], pos + offsets[1]], [val, val]
  867. else:
  868. x, y = [val, val], [pos - offsets[0], pos + offsets[1]]
  869. ax.plot(invx(x), invy(y), color=linecolor, linewidth=linewidth)
  870. continue
  871. # Plot the main violin body
  872. plot_func = {"x": ax.fill_betweenx, "y": ax.fill_between}[self.orient]
  873. plot_func(
  874. inv_val(data[value_var]),
  875. inv_pos(data[self.orient] - offsets[0]),
  876. inv_pos(data[self.orient] + offsets[1]),
  877. **violin["kwargs"]
  878. )
  879. # Adjust the observation data
  880. obs = violin["observations"]
  881. pos_dict = {self.orient: violin["position"], "width": real_width}
  882. if dodge:
  883. self._dodge(violin["sub_vars"], pos_dict)
  884. if gap:
  885. pos_dict["width"] *= (1 - gap)
  886. # --- Plot the inner components
  887. if inner is None:
  888. continue
  889. elif inner.startswith("point"):
  890. pos = np.array([pos_dict[self.orient]] * len(obs))
  891. if split:
  892. pos += (-1 if right_side else 1) * pos_dict["width"] / 2
  893. x, y = (pos, obs) if self.orient == "x" else (obs, pos)
  894. kws = {
  895. "color": linecolor,
  896. "edgecolor": linecolor,
  897. "s": (linewidth * 2) ** 2,
  898. "zorder": violin["kwargs"].get("zorder", 2) + 1,
  899. **inner_kws,
  900. }
  901. ax.scatter(invx(x), invy(y), **kws)
  902. elif inner.startswith("stick"):
  903. pos0 = np.interp(obs, data[value_var], data[self.orient] - offsets[0])
  904. pos1 = np.interp(obs, data[value_var], data[self.orient] + offsets[1])
  905. pos_pts = np.stack([inv_pos(pos0), inv_pos(pos1)])
  906. val_pts = np.stack([inv_val(obs), inv_val(obs)])
  907. segments = np.stack([pos_pts, val_pts]).transpose(2, 1, 0)
  908. if self.orient == "y":
  909. segments = segments[:, :, ::-1]
  910. kws = {
  911. "color": linecolor,
  912. "linewidth": linewidth / 2,
  913. **inner_kws,
  914. }
  915. lines = mpl.collections.LineCollection(segments, **kws)
  916. ax.add_collection(lines, autolim=False)
  917. elif inner.startswith("quart"):
  918. stats = np.percentile(obs, [25, 50, 75])
  919. pos0 = np.interp(stats, data[value_var], data[self.orient] - offsets[0])
  920. pos1 = np.interp(stats, data[value_var], data[self.orient] + offsets[1])
  921. pos_pts = np.stack([inv_pos(pos0), inv_pos(pos1)])
  922. val_pts = np.stack([inv_val(stats), inv_val(stats)])
  923. segments = np.stack([pos_pts, val_pts]).transpose(2, 0, 1)
  924. if self.orient == "y":
  925. segments = segments[:, ::-1, :]
  926. dashes = [(1.25, .75), (2.5, 1), (1.25, .75)]
  927. for i, segment in enumerate(segments):
  928. kws = {
  929. "color": linecolor,
  930. "linewidth": linewidth,
  931. "dashes": dashes[i],
  932. **inner_kws,
  933. }
  934. ax.plot(*segment, **kws)
  935. elif inner.startswith("box"):
  936. stats = mpl.cbook.boxplot_stats(obs)[0]
  937. pos = np.array(pos_dict[self.orient])
  938. if split:
  939. pos += (-1 if right_side else 1) * pos_dict["width"] / 2
  940. pos = [pos, pos], [pos, pos], [pos]
  941. val = (
  942. [stats["whislo"], stats["whishi"]],
  943. [stats["q1"], stats["q3"]],
  944. [stats["med"]]
  945. )
  946. if self.orient == "x":
  947. (x0, x1, x2), (y0, y1, y2) = pos, val
  948. else:
  949. (x0, x1, x2), (y0, y1, y2) = val, pos
  950. if split:
  951. offset = (1 if right_side else -1) * box_width / 72 / 2
  952. dx, dy = (offset, 0) if self.orient == "x" else (0, -offset)
  953. trans = ax.transData + mpl.transforms.ScaledTranslation(
  954. dx, dy, ax.figure.dpi_scale_trans,
  955. )
  956. else:
  957. trans = ax.transData
  958. line_kws = {
  959. "color": linecolor,
  960. "transform": trans,
  961. **inner_kws,
  962. "linewidth": whis_width,
  963. }
  964. ax.plot(invx(x0), invy(y0), **line_kws)
  965. line_kws["linewidth"] = box_width
  966. ax.plot(invx(x1), invy(y1), **line_kws)
  967. dot_kws = {
  968. "marker": marker,
  969. "markersize": box_width / 1.2,
  970. "markeredgewidth": box_width / 5,
  971. "transform": trans,
  972. **inner_kws,
  973. "markeredgecolor": "w",
  974. "markerfacecolor": "w",
  975. "color": linecolor, # simplify tests
  976. }
  977. ax.plot(invx(x2), invy(y2), **dot_kws)
  978. legend_artist = _get_patch_legend_artist(fill)
  979. common_kws = {**plot_kws, "linewidth": linewidth, "edgecolor": linecolor}
  980. self._configure_legend(ax, legend_artist, common_kws)
  981. def plot_points(
  982. self,
  983. aggregator,
  984. markers,
  985. linestyles,
  986. dodge,
  987. color,
  988. capsize,
  989. err_kws,
  990. plot_kws,
  991. ):
  992. agg_var = {"x": "y", "y": "x"}[self.orient]
  993. iter_vars = ["hue"]
  994. plot_kws = _normalize_kwargs(plot_kws, mpl.lines.Line2D)
  995. plot_kws.setdefault("linewidth", mpl.rcParams["lines.linewidth"] * 1.8)
  996. plot_kws.setdefault("markeredgewidth", plot_kws["linewidth"] * 0.75)
  997. plot_kws.setdefault("markersize", plot_kws["linewidth"] * np.sqrt(2 * np.pi))
  998. markers = self._map_prop_with_hue("marker", markers, "o", plot_kws)
  999. linestyles = self._map_prop_with_hue("linestyle", linestyles, "-", plot_kws)
  1000. base_positions = self.var_levels[self.orient]
  1001. if self.var_types[self.orient] == "categorical":
  1002. min_cat_val = int(self.comp_data[self.orient].min())
  1003. max_cat_val = int(self.comp_data[self.orient].max())
  1004. base_positions = [i for i in range(min_cat_val, max_cat_val + 1)]
  1005. n_hue_levels = 0 if self._hue_map.levels is None else len(self._hue_map.levels)
  1006. if dodge is True:
  1007. dodge = .025 * n_hue_levels
  1008. ax = self.ax
  1009. for sub_vars, sub_data in self.iter_data(iter_vars,
  1010. from_comp_data=True,
  1011. allow_empty=True):
  1012. ax = self._get_axes(sub_vars)
  1013. ori_axis = getattr(ax, f"{self.orient}axis")
  1014. transform, _ = _get_transform_functions(ax, self.orient)
  1015. positions = transform(ori_axis.convert_units(base_positions))
  1016. agg_data = sub_data if sub_data.empty else (
  1017. sub_data
  1018. .groupby(self.orient)
  1019. .apply(aggregator, agg_var)
  1020. .reindex(pd.Index(positions, name=self.orient))
  1021. .reset_index()
  1022. )
  1023. if dodge:
  1024. hue_idx = self._hue_map.levels.index(sub_vars["hue"])
  1025. step_size = dodge / (n_hue_levels - 1)
  1026. offset = -dodge / 2 + step_size * hue_idx
  1027. agg_data[self.orient] += offset * self._native_width
  1028. self._invert_scale(ax, agg_data)
  1029. sub_kws = plot_kws.copy()
  1030. sub_kws.update(
  1031. marker=markers[sub_vars.get("hue")],
  1032. linestyle=linestyles[sub_vars.get("hue")],
  1033. color=self._hue_map(sub_vars["hue"]) if "hue" in sub_vars else color,
  1034. )
  1035. line, = ax.plot(agg_data["x"], agg_data["y"], **sub_kws)
  1036. sub_err_kws = err_kws.copy()
  1037. line_props = line.properties()
  1038. for prop in ["color", "linewidth", "alpha", "zorder"]:
  1039. sub_err_kws.setdefault(prop, line_props[prop])
  1040. if aggregator.error_method is not None:
  1041. self.plot_errorbars(ax, agg_data, capsize, sub_err_kws)
  1042. legend_artist = partial(mpl.lines.Line2D, [], [])
  1043. semantic_kws = {"hue": {"marker": markers, "linestyle": linestyles}}
  1044. self._configure_legend(ax, legend_artist, sub_kws, semantic_kws)
  1045. def plot_bars(
  1046. self,
  1047. aggregator,
  1048. dodge,
  1049. gap,
  1050. width,
  1051. fill,
  1052. color,
  1053. capsize,
  1054. err_kws,
  1055. plot_kws,
  1056. ):
  1057. agg_var = {"x": "y", "y": "x"}[self.orient]
  1058. iter_vars = ["hue"]
  1059. ax = self.ax
  1060. if self._hue_map.levels is None:
  1061. dodge = False
  1062. if dodge and capsize is not None:
  1063. capsize = capsize / len(self._hue_map.levels)
  1064. if not fill:
  1065. plot_kws.setdefault("linewidth", 1.5 * mpl.rcParams["lines.linewidth"])
  1066. err_kws.setdefault("linewidth", 1.5 * mpl.rcParams["lines.linewidth"])
  1067. for sub_vars, sub_data in self.iter_data(iter_vars,
  1068. from_comp_data=True,
  1069. allow_empty=True):
  1070. ax = self._get_axes(sub_vars)
  1071. agg_data = sub_data if sub_data.empty else (
  1072. sub_data
  1073. .groupby(self.orient)
  1074. .apply(aggregator, agg_var)
  1075. .reset_index()
  1076. )
  1077. agg_data["width"] = width * self._native_width
  1078. if dodge:
  1079. self._dodge(sub_vars, agg_data)
  1080. if gap:
  1081. agg_data["width"] *= 1 - gap
  1082. agg_data["edge"] = agg_data[self.orient] - agg_data["width"] / 2
  1083. self._invert_scale(ax, agg_data)
  1084. if self.orient == "x":
  1085. bar_func = ax.bar
  1086. kws = dict(
  1087. x=agg_data["edge"], height=agg_data["y"], width=agg_data["width"]
  1088. )
  1089. else:
  1090. bar_func = ax.barh
  1091. kws = dict(
  1092. y=agg_data["edge"], width=agg_data["x"], height=agg_data["width"]
  1093. )
  1094. main_color = self._hue_map(sub_vars["hue"]) if "hue" in sub_vars else color
  1095. # Set both color and facecolor for property cycle logic
  1096. kws["align"] = "edge"
  1097. if fill:
  1098. kws.update(color=main_color, facecolor=main_color)
  1099. else:
  1100. kws.update(color=main_color, edgecolor=main_color, facecolor="none")
  1101. bar_func(**{**kws, **plot_kws})
  1102. if aggregator.error_method is not None:
  1103. self.plot_errorbars(
  1104. ax, agg_data, capsize,
  1105. {"color": ".26" if fill else main_color, **err_kws}
  1106. )
  1107. legend_artist = _get_patch_legend_artist(fill)
  1108. self._configure_legend(ax, legend_artist, plot_kws)
  1109. def plot_errorbars(self, ax, data, capsize, err_kws):
  1110. var = {"x": "y", "y": "x"}[self.orient]
  1111. for row in data.to_dict("records"):
  1112. row = dict(row)
  1113. pos = np.array([row[self.orient], row[self.orient]])
  1114. val = np.array([row[f"{var}min"], row[f"{var}max"]])
  1115. if capsize:
  1116. cw = capsize * self._native_width / 2
  1117. scl, inv = _get_transform_functions(ax, self.orient)
  1118. cap = inv(scl(pos[0]) - cw), inv(scl(pos[1]) + cw)
  1119. pos = np.concatenate([
  1120. [*cap, np.nan], pos, [np.nan, *cap]
  1121. ])
  1122. val = np.concatenate([
  1123. [val[0], val[0], np.nan], val, [np.nan, val[-1], val[-1]],
  1124. ])
  1125. if self.orient == "x":
  1126. args = pos, val
  1127. else:
  1128. args = val, pos
  1129. ax.plot(*args, **err_kws)
  1130. class _CategoricalAggPlotter(_CategoricalPlotter):
  1131. flat_structure = {"x": "@index", "y": "@values"}
  1132. _categorical_docs = dict(
  1133. # Shared narrative docs
  1134. categorical_narrative=dedent("""\
  1135. See the :ref:`tutorial <categorical_tutorial>` for more information.
  1136. .. note::
  1137. By default, this function treats one of the variables as categorical
  1138. and draws data at ordinal positions (0, 1, ... n) on the relevant axis.
  1139. As of version 0.13.0, this can be disabled by setting `native_scale=True`.
  1140. """),
  1141. # Shared function parameters
  1142. input_params=dedent("""\
  1143. x, y, hue : names of variables in `data` or vector data
  1144. Inputs for plotting long-form data. See examples for interpretation.\
  1145. """),
  1146. categorical_data=dedent("""\
  1147. data : DataFrame, Series, dict, array, or list of arrays
  1148. Dataset for plotting. If `x` and `y` are absent, this is
  1149. interpreted as wide-form. Otherwise it is expected to be long-form.\
  1150. """),
  1151. order_vars=dedent("""\
  1152. order, hue_order : lists of strings
  1153. Order to plot the categorical levels in; otherwise the levels are
  1154. inferred from the data objects.\
  1155. """),
  1156. stat_api_params=dedent("""\
  1157. estimator : string or callable that maps vector -> scalar
  1158. Statistical function to estimate within each categorical bin.
  1159. errorbar : string, (string, number) tuple, callable or None
  1160. Name of errorbar method (either "ci", "pi", "se", or "sd"), or a tuple
  1161. with a method name and a level parameter, or a function that maps from a
  1162. vector to a (min, max) interval, or None to hide errorbar.
  1163. .. versionadded:: v0.12.0
  1164. n_boot : int
  1165. Number of bootstrap samples used to compute confidence intervals.
  1166. units : name of variable in `data` or vector data
  1167. Identifier of sampling units; used by the errorbar function to
  1168. perform a multilevel bootstrap and account for repeated measures
  1169. seed : int, `numpy.random.Generator`, or `numpy.random.RandomState`
  1170. Seed or random number generator for reproducible bootstrapping.\
  1171. """),
  1172. ci=dedent("""\
  1173. ci : float
  1174. Level of the confidence interval to show, in [0, 100].
  1175. .. deprecated:: v0.12.0
  1176. Use `errorbar=("ci", ...)`.\
  1177. """),
  1178. orient=dedent("""\
  1179. orient : "v" | "h" | "x" | "y"
  1180. Orientation of the plot (vertical or horizontal). This is usually
  1181. inferred based on the type of the input variables, but it can be used
  1182. to resolve ambiguity when both `x` and `y` are numeric or when
  1183. plotting wide-form data.
  1184. .. versionchanged:: v0.13.0
  1185. Added 'x'/'y' as options, equivalent to 'v'/'h'.\
  1186. """),
  1187. color=dedent("""\
  1188. color : matplotlib color
  1189. Single color for the elements in the plot.\
  1190. """),
  1191. palette=dedent("""\
  1192. palette : palette name, list, dict, or :class:`matplotlib.colors.Colormap`
  1193. Color palette that maps the hue variable. If the palette is a dictionary,
  1194. keys should be names of levels and values should be matplotlib colors.
  1195. The type/value will sometimes force a qualitative/quantitative mapping.\
  1196. """),
  1197. hue_norm=dedent("""\
  1198. hue_norm : tuple or :class:`matplotlib.colors.Normalize` object
  1199. Normalization in data units for colormap applied to the `hue`
  1200. variable when it is numeric. Not relevant if `hue` is categorical.
  1201. .. versionadded:: v0.12.0\
  1202. """),
  1203. saturation=dedent("""\
  1204. saturation : float
  1205. Proportion of the original saturation to draw fill colors in. Large
  1206. patches often look better with desaturated colors, but set this to
  1207. `1` if you want the colors to perfectly match the input values.\
  1208. """),
  1209. capsize=dedent("""\
  1210. capsize : float
  1211. Width of the "caps" on error bars, relative to bar spacing.\
  1212. """),
  1213. errcolor=dedent("""\
  1214. errcolor : matplotlib color
  1215. Color used for the error bar lines.
  1216. .. deprecated:: 0.13.0
  1217. Use `err_kws={'color': ...}`.\
  1218. """),
  1219. errwidth=dedent("""\
  1220. errwidth : float
  1221. Thickness of error bar lines (and caps), in points.
  1222. .. deprecated:: 0.13.0
  1223. Use `err_kws={'linewidth': ...}`.\
  1224. """),
  1225. fill=dedent("""\
  1226. fill : bool
  1227. If True, use a solid patch. Otherwise, draw as line art.
  1228. .. versionadded:: v0.13.0\
  1229. """),
  1230. gap=dedent("""\
  1231. gap : float
  1232. Shrink on the orient axis by this factor to add a gap between dodged elements.
  1233. .. versionadded:: 0.13.0\
  1234. """),
  1235. width=dedent("""\
  1236. width : float
  1237. Width allotted to each element on the orient axis. When `native_scale=True`,
  1238. it is relative to the minimum distance between two values in the native scale.\
  1239. """),
  1240. dodge=dedent("""\
  1241. dodge : "auto" or bool
  1242. When hue mapping is used, whether elements should be narrowed and shifted along
  1243. the orient axis to eliminate overlap. If `"auto"`, set to `True` when the
  1244. orient variable is crossed with the categorical variable or `False` otherwise.
  1245. .. versionchanged:: 0.13.0
  1246. Added `"auto"` mode as a new default.\
  1247. """),
  1248. linewidth=dedent("""\
  1249. linewidth : float
  1250. Width of the lines that frame the plot elements.\
  1251. """),
  1252. linecolor=dedent("""\
  1253. linecolor : color
  1254. Color to use for line elements, when `fill` is True.
  1255. .. versionadded:: v0.13.0\
  1256. """),
  1257. log_scale=dedent("""\
  1258. log_scale : bool or number, or pair of bools or numbers
  1259. Set axis scale(s) to log. A single value sets the data axis for any numeric
  1260. axes in the plot. A pair of values sets each axis independently.
  1261. Numeric values are interpreted as the desired base (default 10).
  1262. When `None` or `False`, seaborn defers to the existing Axes scale.
  1263. .. versionadded:: v0.13.0\
  1264. """),
  1265. native_scale=dedent("""\
  1266. native_scale : bool
  1267. When True, numeric or datetime values on the categorical axis will maintain
  1268. their original scaling rather than being converted to fixed indices.
  1269. .. versionadded:: v0.13.0\
  1270. """),
  1271. formatter=dedent("""\
  1272. formatter : callable
  1273. Function for converting categorical data into strings. Affects both grouping
  1274. and tick labels.
  1275. .. versionadded:: v0.13.0\
  1276. """),
  1277. legend=dedent("""\
  1278. legend : "auto", "brief", "full", or False
  1279. How to draw the legend. If "brief", numeric `hue` and `size`
  1280. variables will be represented with a sample of evenly spaced values.
  1281. If "full", every group will get an entry in the legend. If "auto",
  1282. choose between brief or full representation based on number of levels.
  1283. If `False`, no legend data is added and no legend is drawn.
  1284. .. versionadded:: v0.13.0\
  1285. """),
  1286. err_kws=dedent("""\
  1287. err_kws : dict
  1288. Parameters of :class:`matplotlib.lines.Line2D`, for the error bar artists.
  1289. .. versionadded:: v0.13.0\
  1290. """),
  1291. ax_in=dedent("""\
  1292. ax : matplotlib Axes
  1293. Axes object to draw the plot onto, otherwise uses the current Axes.\
  1294. """),
  1295. ax_out=dedent("""\
  1296. ax : matplotlib Axes
  1297. Returns the Axes object with the plot drawn onto it.\
  1298. """),
  1299. # Shared see also
  1300. boxplot=dedent("""\
  1301. boxplot : A traditional box-and-whisker plot with a similar API.\
  1302. """),
  1303. violinplot=dedent("""\
  1304. violinplot : A combination of boxplot and kernel density estimation.\
  1305. """),
  1306. stripplot=dedent("""\
  1307. stripplot : A scatterplot where one variable is categorical. Can be used
  1308. in conjunction with other plots to show each observation.\
  1309. """),
  1310. swarmplot=dedent("""\
  1311. swarmplot : A categorical scatterplot where the points do not overlap. Can
  1312. be used with other plots to show each observation.\
  1313. """),
  1314. barplot=dedent("""\
  1315. barplot : Show point estimates and confidence intervals using bars.\
  1316. """),
  1317. countplot=dedent("""\
  1318. countplot : Show the counts of observations in each categorical bin.\
  1319. """),
  1320. pointplot=dedent("""\
  1321. pointplot : Show point estimates and confidence intervals using dots.\
  1322. """),
  1323. catplot=dedent("""\
  1324. catplot : Combine a categorical plot with a :class:`FacetGrid`.\
  1325. """),
  1326. boxenplot=dedent("""\
  1327. boxenplot : An enhanced boxplot for larger datasets.\
  1328. """),
  1329. )
  1330. _categorical_docs.update(_facet_docs)
  1331. def boxplot(
  1332. data=None, *, x=None, y=None, hue=None, order=None, hue_order=None,
  1333. orient=None, color=None, palette=None, saturation=.75, fill=True,
  1334. dodge="auto", width=.8, gap=0, whis=1.5, linecolor="auto", linewidth=None,
  1335. fliersize=None, hue_norm=None, native_scale=False, log_scale=None, formatter=None,
  1336. legend="auto", ax=None, **kwargs
  1337. ):
  1338. p = _CategoricalPlotter(
  1339. data=data,
  1340. variables=dict(x=x, y=y, hue=hue),
  1341. order=order,
  1342. orient=orient,
  1343. color=color,
  1344. legend=legend,
  1345. )
  1346. if ax is None:
  1347. ax = plt.gca()
  1348. if p.plot_data.empty:
  1349. return ax
  1350. if dodge == "auto":
  1351. # Needs to be before scale_categorical changes the coordinate series dtype
  1352. dodge = p._dodge_needed()
  1353. if p.var_types.get(p.orient) == "categorical" or not native_scale:
  1354. p.scale_categorical(p.orient, order=order, formatter=formatter)
  1355. p._attach(ax, log_scale=log_scale)
  1356. # Deprecations to remove in v0.14.0.
  1357. hue_order = p._palette_without_hue_backcompat(palette, hue_order)
  1358. palette, hue_order = p._hue_backcompat(color, palette, hue_order)
  1359. saturation = saturation if fill else 1
  1360. p.map_hue(palette=palette, order=hue_order, norm=hue_norm, saturation=saturation)
  1361. color = _default_color(
  1362. ax.fill_between, hue, color,
  1363. {k: v for k, v in kwargs.items() if k in ["c", "color", "fc", "facecolor"]},
  1364. saturation=saturation,
  1365. )
  1366. linecolor = p._complement_color(linecolor, color, p._hue_map)
  1367. p.plot_boxes(
  1368. width=width,
  1369. dodge=dodge,
  1370. gap=gap,
  1371. fill=fill,
  1372. whis=whis,
  1373. color=color,
  1374. linecolor=linecolor,
  1375. linewidth=linewidth,
  1376. fliersize=fliersize,
  1377. plot_kws=kwargs,
  1378. )
  1379. p._add_axis_labels(ax)
  1380. p._adjust_cat_axis(ax, axis=p.orient)
  1381. return ax
  1382. boxplot.__doc__ = dedent("""\
  1383. Draw a box plot to show distributions with respect to categories.
  1384. A box plot (or box-and-whisker plot) shows the distribution of quantitative
  1385. data in a way that facilitates comparisons between variables or across
  1386. levels of a categorical variable. The box shows the quartiles of the
  1387. dataset while the whiskers extend to show the rest of the distribution,
  1388. except for points that are determined to be "outliers" using a method
  1389. that is a function of the inter-quartile range.
  1390. {categorical_narrative}
  1391. Parameters
  1392. ----------
  1393. {categorical_data}
  1394. {input_params}
  1395. {order_vars}
  1396. {orient}
  1397. {color}
  1398. {palette}
  1399. {saturation}
  1400. {fill}
  1401. {dodge}
  1402. {width}
  1403. {gap}
  1404. whis : float or pair of floats
  1405. Paramater that controls whisker length. If scalar, whiskers are drawn
  1406. to the farthest datapoint within *whis * IQR* from the nearest hinge.
  1407. If a tuple, it is interpreted as percentiles that whiskers represent.
  1408. {linecolor}
  1409. {linewidth}
  1410. fliersize : float
  1411. Size of the markers used to indicate outlier observations.
  1412. {hue_norm}
  1413. {log_scale}
  1414. {native_scale}
  1415. {formatter}
  1416. {legend}
  1417. {ax_in}
  1418. kwargs : key, value mappings
  1419. Other keyword arguments are passed through to
  1420. :meth:`matplotlib.axes.Axes.boxplot`.
  1421. Returns
  1422. -------
  1423. {ax_out}
  1424. See Also
  1425. --------
  1426. {violinplot}
  1427. {stripplot}
  1428. {swarmplot}
  1429. {catplot}
  1430. Examples
  1431. --------
  1432. .. include:: ../docstrings/boxplot.rst
  1433. """).format(**_categorical_docs)
  1434. def violinplot(
  1435. data=None, *, x=None, y=None, hue=None, order=None, hue_order=None,
  1436. orient=None, color=None, palette=None, saturation=.75, fill=True,
  1437. inner="box", split=False, width=.8, dodge="auto", gap=0,
  1438. linewidth=None, linecolor="auto", cut=2, gridsize=100,
  1439. bw_method="scott", bw_adjust=1, density_norm="area", common_norm=False,
  1440. hue_norm=None, formatter=None, log_scale=None, native_scale=False,
  1441. legend="auto", scale=deprecated, scale_hue=deprecated, bw=deprecated,
  1442. inner_kws=None, ax=None, **kwargs,
  1443. ):
  1444. p = _CategoricalPlotter(
  1445. data=data,
  1446. variables=dict(x=x, y=y, hue=hue),
  1447. order=order,
  1448. orient=orient,
  1449. color=color,
  1450. legend=legend,
  1451. )
  1452. if ax is None:
  1453. ax = plt.gca()
  1454. if p.plot_data.empty:
  1455. return ax
  1456. if dodge == "auto":
  1457. # Needs to be before scale_categorical changes the coordinate series dtype
  1458. dodge = p._dodge_needed()
  1459. if p.var_types.get(p.orient) == "categorical" or not native_scale:
  1460. p.scale_categorical(p.orient, order=order, formatter=formatter)
  1461. p._attach(ax, log_scale=log_scale)
  1462. # Deprecations to remove in v0.14.0.
  1463. hue_order = p._palette_without_hue_backcompat(palette, hue_order)
  1464. palette, hue_order = p._hue_backcompat(color, palette, hue_order)
  1465. saturation = saturation if fill else 1
  1466. p.map_hue(palette=palette, order=hue_order, norm=hue_norm, saturation=saturation)
  1467. color = _default_color(
  1468. ax.fill_between, hue, color,
  1469. {k: v for k, v in kwargs.items() if k in ["c", "color", "fc", "facecolor"]},
  1470. saturation=saturation,
  1471. )
  1472. linecolor = p._complement_color(linecolor, color, p._hue_map)
  1473. density_norm, common_norm = p._violin_scale_backcompat(
  1474. scale, scale_hue, density_norm, common_norm,
  1475. )
  1476. bw_method = p._violin_bw_backcompat(bw, bw_method)
  1477. kde_kws = dict(cut=cut, gridsize=gridsize, bw_method=bw_method, bw_adjust=bw_adjust)
  1478. inner_kws = {} if inner_kws is None else inner_kws.copy()
  1479. p.plot_violins(
  1480. width=width,
  1481. dodge=dodge,
  1482. gap=gap,
  1483. split=split,
  1484. color=color,
  1485. fill=fill,
  1486. linecolor=linecolor,
  1487. linewidth=linewidth,
  1488. inner=inner,
  1489. density_norm=density_norm,
  1490. common_norm=common_norm,
  1491. kde_kws=kde_kws,
  1492. inner_kws=inner_kws,
  1493. plot_kws=kwargs,
  1494. )
  1495. p._add_axis_labels(ax)
  1496. p._adjust_cat_axis(ax, axis=p.orient)
  1497. return ax
  1498. violinplot.__doc__ = dedent("""\
  1499. Draw a patch representing a KDE and add observations or box plot statistics.
  1500. A violin plot plays a similar role as a box-and-whisker plot. It shows the
  1501. distribution of data points after grouping by one (or more) variables.
  1502. Unlike a box plot, each violin is drawn using a kernel density estimate
  1503. of the underlying distribution.
  1504. {categorical_narrative}
  1505. Parameters
  1506. ----------
  1507. {categorical_data}
  1508. {input_params}
  1509. {order_vars}
  1510. {orient}
  1511. {color}
  1512. {palette}
  1513. {saturation}
  1514. {fill}
  1515. inner : {{"box", "quart", "point", "stick", None}}
  1516. Representation of the data in the violin interior. One of the following:
  1517. - `"box"`: draw a miniature box-and-whisker plot
  1518. - `"quart"`: show the quartiles of the data
  1519. - `"point"` or `"stick"`: show each observation
  1520. split : bool
  1521. Show an un-mirrored distribution, alternating sides when using `hue`.
  1522. .. versionchanged:: v0.13.0
  1523. Previously, this option required a `hue` variable with exactly two levels.
  1524. {width}
  1525. {dodge}
  1526. {gap}
  1527. {linewidth}
  1528. {linecolor}
  1529. cut : float
  1530. Distance, in units of bandwidth, to extend the density past extreme
  1531. datapoints. Set to 0 to limit the violin within the data range.
  1532. gridsize : int
  1533. Number of points in the discrete grid used to evaluate the KDE.
  1534. bw_method : {{"scott", "silverman", float}}
  1535. Either the name of a reference rule or the scale factor to use when
  1536. computing the kernel bandwidth. The actual kernel size will be
  1537. determined by multiplying the scale factor by the standard deviation of
  1538. the data within each group.
  1539. .. versionadded:: v0.13.0
  1540. bw_adjust: float
  1541. Factor that scales the bandwidth to use more or less smoothing.
  1542. .. versionadded:: v0.13.0
  1543. density_norm : {{"area", "count", "width"}}
  1544. Method that normalizes each density to determine the violin's width.
  1545. If `area`, each violin will have the same area. If `count`, the width
  1546. will be proportional to the number of observations. If `width`, each
  1547. violin will have the same width.
  1548. .. versionadded:: v0.13.0
  1549. common_norm : bool
  1550. When `True`, normalize the density across all violins.
  1551. .. versionadded:: v0.13.0
  1552. {hue_norm}
  1553. {formatter}
  1554. {log_scale}
  1555. {native_scale}
  1556. {legend}
  1557. scale : {{"area", "count", "width"}}
  1558. .. deprecated:: v0.13.0
  1559. See `density_norm`.
  1560. scale_hue : bool
  1561. .. deprecated:: v0.13.0
  1562. See `common_norm`.
  1563. bw : {{'scott', 'silverman', float}}
  1564. .. deprecated:: v0.13.0
  1565. See `bw_method` and `bw_adjust`.
  1566. inner_kws : dict of key, value mappings
  1567. Keyword arguments for the "inner" plot, passed to one of:
  1568. - :class:`matplotlib.collections.LineCollection` (with `inner="stick"`)
  1569. - :meth:`matplotlib.axes.Axes.scatter` (with `inner="point"`)
  1570. - :meth:`matplotlib.axes.Axes.plot` (with `inner="quart"` or `inner="box"`)
  1571. Additionally, with `inner="box"`, the keywords `box_width`, `whis_width`,
  1572. and `marker` receive special handling for the components of the "box" plot.
  1573. .. versionadded:: v0.13.0
  1574. {ax_in}
  1575. kwargs : key, value mappings
  1576. Keyword arguments for the violin patches, passsed through to
  1577. :meth:`matplotlib.axes.Axes.fill_between`.
  1578. Returns
  1579. -------
  1580. {ax_out}
  1581. See Also
  1582. --------
  1583. {boxplot}
  1584. {stripplot}
  1585. {swarmplot}
  1586. {catplot}
  1587. Examples
  1588. --------
  1589. .. include:: ../docstrings/violinplot.rst
  1590. """).format(**_categorical_docs)
  1591. def boxenplot(
  1592. data=None, *, x=None, y=None, hue=None, order=None, hue_order=None,
  1593. orient=None, color=None, palette=None, saturation=.75, fill=True,
  1594. dodge="auto", width=.8, gap=0, linewidth=None, linecolor=None,
  1595. width_method="exponential", k_depth="tukey", outlier_prop=0.007, trust_alpha=0.05,
  1596. showfliers=True, hue_norm=None, log_scale=None, native_scale=False, formatter=None,
  1597. legend="auto", scale=deprecated, box_kws=None, flier_kws=None, line_kws=None,
  1598. ax=None, **kwargs,
  1599. ):
  1600. p = _CategoricalPlotter(
  1601. data=data,
  1602. variables=dict(x=x, y=y, hue=hue),
  1603. order=order,
  1604. orient=orient,
  1605. color=color,
  1606. legend=legend,
  1607. )
  1608. if ax is None:
  1609. ax = plt.gca()
  1610. if p.plot_data.empty:
  1611. return ax
  1612. if dodge == "auto":
  1613. # Needs to be before scale_categorical changes the coordinate series dtype
  1614. dodge = p._dodge_needed()
  1615. if p.var_types.get(p.orient) == "categorical" or not native_scale:
  1616. p.scale_categorical(p.orient, order=order, formatter=formatter)
  1617. p._attach(ax, log_scale=log_scale)
  1618. # Deprecations to remove in v0.14.0.
  1619. hue_order = p._palette_without_hue_backcompat(palette, hue_order)
  1620. palette, hue_order = p._hue_backcompat(color, palette, hue_order)
  1621. # Longer-term deprecations
  1622. width_method = p._boxen_scale_backcompat(scale, width_method)
  1623. saturation = saturation if fill else 1
  1624. p.map_hue(palette=palette, order=hue_order, norm=hue_norm, saturation=saturation)
  1625. color = _default_color(
  1626. ax.fill_between, hue, color,
  1627. {}, # TODO how to get default color?
  1628. # {k: v for k, v in kwargs.items() if k in ["c", "color", "fc", "facecolor"]},
  1629. saturation=saturation,
  1630. )
  1631. linecolor = p._complement_color(linecolor, color, p._hue_map)
  1632. p.plot_boxens(
  1633. width=width,
  1634. dodge=dodge,
  1635. gap=gap,
  1636. fill=fill,
  1637. color=color,
  1638. linecolor=linecolor,
  1639. linewidth=linewidth,
  1640. width_method=width_method,
  1641. k_depth=k_depth,
  1642. outlier_prop=outlier_prop,
  1643. trust_alpha=trust_alpha,
  1644. showfliers=showfliers,
  1645. box_kws=box_kws,
  1646. flier_kws=flier_kws,
  1647. line_kws=line_kws,
  1648. plot_kws=kwargs,
  1649. )
  1650. p._add_axis_labels(ax)
  1651. p._adjust_cat_axis(ax, axis=p.orient)
  1652. return ax
  1653. boxenplot.__doc__ = dedent("""\
  1654. Draw an enhanced box plot for larger datasets.
  1655. This style of plot was originally named a "letter value" plot because it
  1656. shows a large number of quantiles that are defined as "letter values". It
  1657. is similar to a box plot in plotting a nonparametric representation of a
  1658. distribution in which all features correspond to actual observations. By
  1659. plotting more quantiles, it provides more information about the shape of
  1660. the distribution, particularly in the tails.
  1661. {categorical_narrative}
  1662. Parameters
  1663. ----------
  1664. {categorical_data}
  1665. {input_params}
  1666. {order_vars}
  1667. {orient}
  1668. {color}
  1669. {palette}
  1670. {saturation}
  1671. {fill}
  1672. {dodge}
  1673. {width}
  1674. {gap}
  1675. {linewidth}
  1676. {linecolor}
  1677. width_method : {{"exponential", "linear", "area"}}
  1678. Method to use for the width of the letter value boxes:
  1679. - `"exponential"`: Represent the corresponding percentile
  1680. - `"linear"`: Decrease by a constant amount for each box
  1681. - `"area"`: Represent the density of data points in that box
  1682. k_depth : {{"tukey", "proportion", "trustworthy", "full"}} or int
  1683. The number of levels to compute and draw in each tail:
  1684. - `"tukey"`: Use log2(n) - 3 levels, covering similar range as boxplot whiskers
  1685. - `"proportion"`: Leave approximately `outlier_prop` fliers
  1686. - `"trusthworthy"`: Extend to level with confidence of at least `trust_alpha`
  1687. - `"full"`: Use log2(n) + 1 levels and extend to most extreme points
  1688. outlier_prop : float
  1689. Proportion of data expected to be outliers; used when `k_depth="proportion"`.
  1690. trust_alpha : float
  1691. Confidence threshold for most extreme level; used when `k_depth="trustworthy"`.
  1692. showfliers : bool
  1693. If False, suppress the plotting of outliers.
  1694. {hue_norm}
  1695. {log_scale}
  1696. {native_scale}
  1697. {formatter}
  1698. {legend}
  1699. box_kws: dict
  1700. Keyword arguments for the box artists; passed to
  1701. :class:`matplotlib.patches.Rectangle`.
  1702. .. versionadded:: v0.12.0
  1703. line_kws: dict
  1704. Keyword arguments for the line denoting the median; passed to
  1705. :meth:`matplotlib.axes.Axes.plot`.
  1706. .. versionadded:: v0.12.0
  1707. flier_kws: dict
  1708. Keyword arguments for the scatter denoting the outlier observations;
  1709. passed to :meth:`matplotlib.axes.Axes.scatter`.
  1710. .. versionadded:: v0.12.0
  1711. {ax_in}
  1712. kwargs : key, value mappings
  1713. Other keyword arguments are passed to :class:`matplotlib.patches.Rectangle`,
  1714. superceded by those in `box_kws`.
  1715. Returns
  1716. -------
  1717. {ax_out}
  1718. See Also
  1719. --------
  1720. {violinplot}
  1721. {boxplot}
  1722. {catplot}
  1723. Notes
  1724. -----
  1725. For a more extensive explanation, you can read the paper that introduced the plot:
  1726. https://vita.had.co.nz/papers/letter-value-plot.html
  1727. Examples
  1728. --------
  1729. .. include:: ../docstrings/boxenplot.rst
  1730. """).format(**_categorical_docs)
  1731. def stripplot(
  1732. data=None, *, x=None, y=None, hue=None, order=None, hue_order=None,
  1733. jitter=True, dodge=False, orient=None, color=None, palette=None,
  1734. size=5, edgecolor=default, linewidth=0,
  1735. hue_norm=None, log_scale=None, native_scale=False, formatter=None, legend="auto",
  1736. ax=None, **kwargs
  1737. ):
  1738. p = _CategoricalPlotter(
  1739. data=data,
  1740. variables=dict(x=x, y=y, hue=hue),
  1741. order=order,
  1742. orient=orient,
  1743. color=color,
  1744. legend=legend,
  1745. )
  1746. if ax is None:
  1747. ax = plt.gca()
  1748. if p.plot_data.empty:
  1749. return ax
  1750. if p.var_types.get(p.orient) == "categorical" or not native_scale:
  1751. p.scale_categorical(p.orient, order=order, formatter=formatter)
  1752. p._attach(ax, log_scale=log_scale)
  1753. # Deprecations to remove in v0.14.0.
  1754. hue_order = p._palette_without_hue_backcompat(palette, hue_order)
  1755. palette, hue_order = p._hue_backcompat(color, palette, hue_order)
  1756. p.map_hue(palette=palette, order=hue_order, norm=hue_norm)
  1757. color = _default_color(ax.scatter, hue, color, kwargs)
  1758. edgecolor = p._complement_color(edgecolor, color, p._hue_map)
  1759. kwargs.setdefault("zorder", 3)
  1760. size = kwargs.get("s", size)
  1761. kwargs.update(
  1762. s=size ** 2,
  1763. edgecolor=edgecolor,
  1764. linewidth=linewidth,
  1765. )
  1766. p.plot_strips(
  1767. jitter=jitter,
  1768. dodge=dodge,
  1769. color=color,
  1770. plot_kws=kwargs,
  1771. )
  1772. # XXX this happens inside a plotting method in the distribution plots
  1773. # but maybe it's better out here? Alternatively, we have an open issue
  1774. # suggesting that _attach could add default axes labels, which seems smart.
  1775. p._add_axis_labels(ax)
  1776. p._adjust_cat_axis(ax, axis=p.orient)
  1777. return ax
  1778. stripplot.__doc__ = dedent("""\
  1779. Draw a categorical scatterplot using jitter to reduce overplotting.
  1780. A strip plot can be drawn on its own, but it is also a good complement
  1781. to a box or violin plot in cases where you want to show all observations
  1782. along with some representation of the underlying distribution.
  1783. {categorical_narrative}
  1784. Parameters
  1785. ----------
  1786. {categorical_data}
  1787. {input_params}
  1788. {order_vars}
  1789. jitter : float, `True`/`1` is special-cased
  1790. Amount of jitter (only along the categorical axis) to apply. This
  1791. can be useful when you have many points and they overlap, so that
  1792. it is easier to see the distribution. You can specify the amount
  1793. of jitter (half the width of the uniform random variable support),
  1794. or use `True` for a good default.
  1795. dodge : bool
  1796. When a `hue` variable is assigned, setting this to `True` will
  1797. separate the strips for different hue levels along the categorical
  1798. axis and narrow the amount of space allotedto each strip. Otherwise,
  1799. the points for each level will be plotted in the same strip.
  1800. {orient}
  1801. {color}
  1802. {palette}
  1803. size : float
  1804. Radius of the markers, in points.
  1805. edgecolor : matplotlib color, "gray" is special-cased
  1806. Color of the lines around each point. If you pass `"gray"`, the
  1807. brightness is determined by the color palette used for the body
  1808. of the points. Note that `stripplot` has `linewidth=0` by default,
  1809. so edge colors are only visible with nonzero line width.
  1810. {linewidth}
  1811. {hue_norm}
  1812. {log_scale}
  1813. {native_scale}
  1814. {formatter}
  1815. {legend}
  1816. {ax_in}
  1817. kwargs : key, value mappings
  1818. Other keyword arguments are passed through to
  1819. :meth:`matplotlib.axes.Axes.scatter`.
  1820. Returns
  1821. -------
  1822. {ax_out}
  1823. See Also
  1824. --------
  1825. {swarmplot}
  1826. {boxplot}
  1827. {violinplot}
  1828. {catplot}
  1829. Examples
  1830. --------
  1831. .. include:: ../docstrings/stripplot.rst
  1832. """).format(**_categorical_docs)
  1833. def swarmplot(
  1834. data=None, *, x=None, y=None, hue=None, order=None, hue_order=None,
  1835. dodge=False, orient=None, color=None, palette=None,
  1836. size=5, edgecolor=None, linewidth=0, hue_norm=None, log_scale=None,
  1837. native_scale=False, formatter=None, legend="auto", warn_thresh=.05,
  1838. ax=None, **kwargs
  1839. ):
  1840. p = _CategoricalPlotter(
  1841. data=data,
  1842. variables=dict(x=x, y=y, hue=hue),
  1843. order=order,
  1844. orient=orient,
  1845. color=color,
  1846. legend=legend,
  1847. )
  1848. if ax is None:
  1849. ax = plt.gca()
  1850. if p.plot_data.empty:
  1851. return ax
  1852. if p.var_types.get(p.orient) == "categorical" or not native_scale:
  1853. p.scale_categorical(p.orient, order=order, formatter=formatter)
  1854. p._attach(ax, log_scale=log_scale)
  1855. if not p.has_xy_data:
  1856. return ax
  1857. # Deprecations to remove in v0.14.0.
  1858. hue_order = p._palette_without_hue_backcompat(palette, hue_order)
  1859. palette, hue_order = p._hue_backcompat(color, palette, hue_order)
  1860. p.map_hue(palette=palette, order=hue_order, norm=hue_norm)
  1861. color = _default_color(ax.scatter, hue, color, kwargs)
  1862. edgecolor = p._complement_color(edgecolor, color, p._hue_map)
  1863. kwargs.setdefault("zorder", 3)
  1864. size = kwargs.get("s", size)
  1865. if linewidth is None:
  1866. linewidth = size / 10
  1867. kwargs.update(dict(
  1868. s=size ** 2,
  1869. edgecolor=edgecolor,
  1870. linewidth=linewidth,
  1871. ))
  1872. p.plot_swarms(
  1873. dodge=dodge,
  1874. color=color,
  1875. warn_thresh=warn_thresh,
  1876. plot_kws=kwargs,
  1877. )
  1878. p._add_axis_labels(ax)
  1879. p._adjust_cat_axis(ax, axis=p.orient)
  1880. return ax
  1881. swarmplot.__doc__ = dedent("""\
  1882. Draw a categorical scatterplot with points adjusted to be non-overlapping.
  1883. This function is similar to :func:`stripplot`, but the points are adjusted
  1884. (only along the categorical axis) so that they don't overlap. This gives a
  1885. better representation of the distribution of values, but it does not scale
  1886. well to large numbers of observations. This style of plot is sometimes
  1887. called a "beeswarm".
  1888. A swarm plot can be drawn on its own, but it is also a good complement
  1889. to a box or violin plot in cases where you want to show all observations
  1890. along with some representation of the underlying distribution.
  1891. {categorical_narrative}
  1892. Parameters
  1893. ----------
  1894. {categorical_data}
  1895. {input_params}
  1896. {order_vars}
  1897. dodge : bool
  1898. When a `hue` variable is assigned, setting this to `True` will
  1899. separate the swaarms for different hue levels along the categorical
  1900. axis and narrow the amount of space allotedto each strip. Otherwise,
  1901. the points for each level will be plotted in the same swarm.
  1902. {orient}
  1903. {color}
  1904. {palette}
  1905. size : float
  1906. Radius of the markers, in points.
  1907. edgecolor : matplotlib color, "gray" is special-cased
  1908. Color of the lines around each point. If you pass `"gray"`, the
  1909. brightness is determined by the color palette used for the body
  1910. of the points.
  1911. {linewidth}
  1912. {log_scale}
  1913. {native_scale}
  1914. {formatter}
  1915. {legend}
  1916. {ax_in}
  1917. kwargs : key, value mappings
  1918. Other keyword arguments are passed through to
  1919. :meth:`matplotlib.axes.Axes.scatter`.
  1920. Returns
  1921. -------
  1922. {ax_out}
  1923. See Also
  1924. --------
  1925. {boxplot}
  1926. {violinplot}
  1927. {stripplot}
  1928. {catplot}
  1929. Examples
  1930. --------
  1931. .. include:: ../docstrings/swarmplot.rst
  1932. """).format(**_categorical_docs)
  1933. def barplot(
  1934. data=None, *, x=None, y=None, hue=None, order=None, hue_order=None,
  1935. estimator="mean", errorbar=("ci", 95), n_boot=1000, units=None, seed=None,
  1936. orient=None, color=None, palette=None, saturation=.75, fill=True, hue_norm=None,
  1937. width=.8, dodge="auto", gap=0, log_scale=None, native_scale=False, formatter=None,
  1938. legend="auto", capsize=0, err_kws=None,
  1939. ci=deprecated, errcolor=deprecated, errwidth=deprecated, ax=None, **kwargs,
  1940. ):
  1941. errorbar = utils._deprecate_ci(errorbar, ci)
  1942. # Be backwards compatible with len passed directly, which
  1943. # does not work in Series.agg (maybe a pandas bug?)
  1944. if estimator is len:
  1945. estimator = "size"
  1946. p = _CategoricalAggPlotter(
  1947. data=data,
  1948. variables=dict(x=x, y=y, hue=hue, units=units),
  1949. order=order,
  1950. orient=orient,
  1951. color=color,
  1952. legend=legend,
  1953. )
  1954. if ax is None:
  1955. ax = plt.gca()
  1956. if p.plot_data.empty:
  1957. return ax
  1958. if dodge == "auto":
  1959. # Needs to be before scale_categorical changes the coordinate series dtype
  1960. dodge = p._dodge_needed()
  1961. if p.var_types.get(p.orient) == "categorical" or not native_scale:
  1962. p.scale_categorical(p.orient, order=order, formatter=formatter)
  1963. p._attach(ax, log_scale=log_scale)
  1964. # Deprecations to remove in v0.14.0.
  1965. hue_order = p._palette_without_hue_backcompat(palette, hue_order)
  1966. palette, hue_order = p._hue_backcompat(color, palette, hue_order)
  1967. saturation = saturation if fill else 1
  1968. p.map_hue(palette=palette, order=hue_order, norm=hue_norm, saturation=saturation)
  1969. color = _default_color(ax.bar, hue, color, kwargs, saturation=saturation)
  1970. aggregator = EstimateAggregator(estimator, errorbar, n_boot=n_boot, seed=seed)
  1971. err_kws = {} if err_kws is None else _normalize_kwargs(err_kws, mpl.lines.Line2D)
  1972. # Deprecations to remove in v0.15.0.
  1973. err_kws, capsize = p._err_kws_backcompat(err_kws, errcolor, errwidth, capsize)
  1974. p.plot_bars(
  1975. aggregator=aggregator,
  1976. dodge=dodge,
  1977. width=width,
  1978. gap=gap,
  1979. color=color,
  1980. fill=fill,
  1981. capsize=capsize,
  1982. err_kws=err_kws,
  1983. plot_kws=kwargs,
  1984. )
  1985. p._add_axis_labels(ax)
  1986. p._adjust_cat_axis(ax, axis=p.orient)
  1987. return ax
  1988. barplot.__doc__ = dedent("""\
  1989. Show point estimates and errors as rectangular bars.
  1990. A bar plot represents an aggregate or statistical estimate for a numeric
  1991. variable with the height of each rectangle and indicates the uncertainty
  1992. around that estimate using an error bar. Bar plots include 0 in the
  1993. axis range, and they are a good choice when 0 is a meaningful value
  1994. for the variable to take.
  1995. {categorical_narrative}
  1996. Parameters
  1997. ----------
  1998. {categorical_data}
  1999. {input_params}
  2000. {order_vars}
  2001. {stat_api_params}
  2002. {orient}
  2003. {color}
  2004. {palette}
  2005. {saturation}
  2006. {fill}
  2007. {hue_norm}
  2008. {width}
  2009. {dodge}
  2010. {gap}
  2011. {log_scale}
  2012. {native_scale}
  2013. {formatter}
  2014. {legend}
  2015. {capsize}
  2016. {err_kws}
  2017. {ci}
  2018. {errcolor}
  2019. {errwidth}
  2020. {ax_in}
  2021. kwargs : key, value mappings
  2022. Other parameters are passed through to :class:`matplotlib.patches.Rectangle`.
  2023. Returns
  2024. -------
  2025. {ax_out}
  2026. See Also
  2027. --------
  2028. {countplot}
  2029. {pointplot}
  2030. {catplot}
  2031. Notes
  2032. -----
  2033. For datasets where 0 is not a meaningful value, a :func:`pointplot` will
  2034. allow you to focus on differences between levels of one or more categorical
  2035. variables.
  2036. It is also important to keep in mind that a bar plot shows only the mean (or
  2037. other aggregate) value, but it is often more informative to show the
  2038. distribution of values at each level of the categorical variables. In those
  2039. cases, approaches such as a :func:`boxplot` or :func:`violinplot` may be
  2040. more appropriate.
  2041. Examples
  2042. --------
  2043. .. include:: ../docstrings/barplot.rst
  2044. """).format(**_categorical_docs)
  2045. def pointplot(
  2046. data=None, *, x=None, y=None, hue=None, order=None, hue_order=None,
  2047. estimator="mean", errorbar=("ci", 95), n_boot=1000, units=None, seed=None,
  2048. color=None, palette=None, hue_norm=None, markers=default, linestyles=default,
  2049. dodge=False, log_scale=None, native_scale=False, orient=None, capsize=0,
  2050. formatter=None, legend="auto", err_kws=None,
  2051. ci=deprecated, errwidth=deprecated, join=deprecated, scale=deprecated,
  2052. ax=None,
  2053. **kwargs,
  2054. ):
  2055. errorbar = utils._deprecate_ci(errorbar, ci)
  2056. p = _CategoricalAggPlotter(
  2057. data=data,
  2058. variables=dict(x=x, y=y, hue=hue, units=units),
  2059. order=order,
  2060. orient=orient,
  2061. # Handle special backwards compatibility where pointplot originally
  2062. # did *not* default to multi-colored unless a palette was specified.
  2063. color="C0" if (color is None and palette is None) else color,
  2064. legend=legend,
  2065. )
  2066. if ax is None:
  2067. ax = plt.gca()
  2068. if p.plot_data.empty:
  2069. return ax
  2070. if p.var_types.get(p.orient) == "categorical" or not native_scale:
  2071. p.scale_categorical(p.orient, order=order, formatter=formatter)
  2072. p._attach(ax, log_scale=log_scale)
  2073. # Deprecations to remove in v0.14.0.
  2074. hue_order = p._palette_without_hue_backcompat(palette, hue_order)
  2075. palette, hue_order = p._hue_backcompat(color, palette, hue_order)
  2076. p.map_hue(palette=palette, order=hue_order, norm=hue_norm)
  2077. color = _default_color(ax.plot, hue, color, kwargs)
  2078. aggregator = EstimateAggregator(estimator, errorbar, n_boot=n_boot, seed=seed)
  2079. err_kws = {} if err_kws is None else _normalize_kwargs(err_kws, mpl.lines.Line2D)
  2080. # Deprecations to remove in v0.15.0.
  2081. p._point_kwargs_backcompat(scale, join, kwargs)
  2082. err_kws, capsize = p._err_kws_backcompat(err_kws, None, errwidth, capsize)
  2083. p.plot_points(
  2084. aggregator=aggregator,
  2085. markers=markers,
  2086. linestyles=linestyles,
  2087. dodge=dodge,
  2088. color=color,
  2089. capsize=capsize,
  2090. err_kws=err_kws,
  2091. plot_kws=kwargs,
  2092. )
  2093. p._add_axis_labels(ax)
  2094. p._adjust_cat_axis(ax, axis=p.orient)
  2095. return ax
  2096. pointplot.__doc__ = dedent("""\
  2097. Show point estimates and errors using lines with markers.
  2098. A point plot represents an estimate of central tendency for a numeric
  2099. variable by the position of the dot and provides some indication of the
  2100. uncertainty around that estimate using error bars.
  2101. Point plots can be more useful than bar plots for focusing comparisons
  2102. between different levels of one or more categorical variables. They are
  2103. particularly adept at showing interactions: how the relationship between
  2104. levels of one categorical variable changes across levels of a second
  2105. categorical variable. The lines that join each point from the same `hue`
  2106. level allow interactions to be judged by differences in slope, which is
  2107. easier for the eyes than comparing the heights of several groups of points
  2108. or bars.
  2109. {categorical_narrative}
  2110. Parameters
  2111. ----------
  2112. {categorical_data}
  2113. {input_params}
  2114. {order_vars}
  2115. {stat_api_params}
  2116. {color}
  2117. {palette}
  2118. markers : string or list of strings
  2119. Markers to use for each of the `hue` levels.
  2120. linestyles : string or list of strings
  2121. Line styles to use for each of the `hue` levels.
  2122. dodge : bool or float
  2123. Amount to separate the points for each level of the `hue` variable along
  2124. the categorical axis. Setting to `True` will apply a small default.
  2125. {log_scale}
  2126. {native_scale}
  2127. {orient}
  2128. {capsize}
  2129. {formatter}
  2130. {legend}
  2131. {err_kws}
  2132. {ci}
  2133. {errwidth}
  2134. join : bool
  2135. If `True`, connect point estimates with a line.
  2136. .. deprecated:: v0.13.0
  2137. Set `linestyle="none"` to remove the lines between the points.
  2138. scale : float
  2139. Scale factor for the plot elements.
  2140. .. deprecated:: v0.13.0
  2141. Control element sizes with :class:`matplotlib.lines.Line2D` parameters.
  2142. {ax_in}
  2143. kwargs : key, value mappings
  2144. Other parameters are passed through to :class:`matplotlib.lines.Line2D`.
  2145. .. versionadded:: v0.13.0
  2146. Returns
  2147. -------
  2148. {ax_out}
  2149. See Also
  2150. --------
  2151. {barplot}
  2152. {catplot}
  2153. Notes
  2154. -----
  2155. It is important to keep in mind that a point plot shows only the mean (or
  2156. other estimator) value, but in many cases it may be more informative to
  2157. show the distribution of values at each level of the categorical variables.
  2158. In that case, other approaches such as a box or violin plot may be more
  2159. appropriate.
  2160. Examples
  2161. --------
  2162. .. include:: ../docstrings/pointplot.rst
  2163. """).format(**_categorical_docs)
  2164. def countplot(
  2165. data=None, *, x=None, y=None, hue=None, order=None, hue_order=None,
  2166. orient=None, color=None, palette=None, saturation=.75, fill=True, hue_norm=None,
  2167. stat="count", width=.8, dodge="auto", gap=0, log_scale=None, native_scale=False,
  2168. formatter=None, legend="auto", ax=None, **kwargs
  2169. ):
  2170. if x is None and y is not None:
  2171. orient = "y"
  2172. x = 1 if list(y) else None
  2173. elif x is not None and y is None:
  2174. orient = "x"
  2175. y = 1 if list(x) else None
  2176. elif x is not None and y is not None:
  2177. raise TypeError("Cannot pass values for both `x` and `y`.")
  2178. p = _CategoricalAggPlotter(
  2179. data=data,
  2180. variables=dict(x=x, y=y, hue=hue),
  2181. order=order,
  2182. orient=orient,
  2183. color=color,
  2184. legend=legend,
  2185. )
  2186. if ax is None:
  2187. ax = plt.gca()
  2188. if p.plot_data.empty:
  2189. return ax
  2190. if dodge == "auto":
  2191. # Needs to be before scale_categorical changes the coordinate series dtype
  2192. dodge = p._dodge_needed()
  2193. if p.var_types.get(p.orient) == "categorical" or not native_scale:
  2194. p.scale_categorical(p.orient, order=order, formatter=formatter)
  2195. p._attach(ax, log_scale=log_scale)
  2196. # Deprecations to remove in v0.14.0.
  2197. hue_order = p._palette_without_hue_backcompat(palette, hue_order)
  2198. palette, hue_order = p._hue_backcompat(color, palette, hue_order)
  2199. saturation = saturation if fill else 1
  2200. p.map_hue(palette=palette, order=hue_order, norm=hue_norm, saturation=saturation)
  2201. color = _default_color(ax.bar, hue, color, kwargs, saturation)
  2202. count_axis = {"x": "y", "y": "x"}[p.orient]
  2203. if p.input_format == "wide":
  2204. p.plot_data[count_axis] = 1
  2205. _check_argument("stat", ["count", "percent", "probability", "proportion"], stat)
  2206. p.variables[count_axis] = stat
  2207. if stat != "count":
  2208. denom = 100 if stat == "percent" else 1
  2209. p.plot_data[count_axis] /= len(p.plot_data) / denom
  2210. aggregator = EstimateAggregator("sum", errorbar=None)
  2211. p.plot_bars(
  2212. aggregator=aggregator,
  2213. dodge=dodge,
  2214. width=width,
  2215. gap=gap,
  2216. color=color,
  2217. fill=fill,
  2218. capsize=0,
  2219. err_kws={},
  2220. plot_kws=kwargs,
  2221. )
  2222. p._add_axis_labels(ax)
  2223. p._adjust_cat_axis(ax, axis=p.orient)
  2224. return ax
  2225. countplot.__doc__ = dedent("""\
  2226. Show the counts of observations in each categorical bin using bars.
  2227. A count plot can be thought of as a histogram across a categorical, instead
  2228. of quantitative, variable. The basic API and options are identical to those
  2229. for :func:`barplot`, so you can compare counts across nested variables.
  2230. Note that :func:`histplot` function offers similar functionality with additional
  2231. features (e.g. bar stacking), although its default behavior is somewhat different.
  2232. {categorical_narrative}
  2233. Parameters
  2234. ----------
  2235. {categorical_data}
  2236. {input_params}
  2237. {order_vars}
  2238. {orient}
  2239. {color}
  2240. {palette}
  2241. {saturation}
  2242. {hue_norm}
  2243. stat : {{'count', 'percent', 'proportion', 'probability'}}
  2244. Statistic to compute; when not `'count'`, bar heights will be normalized so that
  2245. they sum to 100 (for `'percent'`) or 1 (otherwise) across the plot.
  2246. .. versionadded:: v0.13.0
  2247. {width}
  2248. {dodge}
  2249. {log_scale}
  2250. {native_scale}
  2251. {formatter}
  2252. {legend}
  2253. {ax_in}
  2254. kwargs : key, value mappings
  2255. Other parameters are passed through to :class:`matplotlib.patches.Rectangle`.
  2256. Returns
  2257. -------
  2258. {ax_out}
  2259. See Also
  2260. --------
  2261. histplot : Bin and count observations with additional options.
  2262. {barplot}
  2263. {catplot}
  2264. Examples
  2265. --------
  2266. .. include:: ../docstrings/countplot.rst
  2267. """).format(**_categorical_docs)
  2268. def catplot(
  2269. data=None, *, x=None, y=None, hue=None, row=None, col=None, kind="strip",
  2270. estimator="mean", errorbar=("ci", 95), n_boot=1000, units=None, seed=None,
  2271. order=None, hue_order=None, row_order=None, col_order=None, col_wrap=None,
  2272. height=5, aspect=1, log_scale=None, native_scale=False, formatter=None,
  2273. orient=None, color=None, palette=None, hue_norm=None, legend="auto",
  2274. legend_out=True, sharex=True, sharey=True, margin_titles=False, facet_kws=None,
  2275. ci=deprecated, **kwargs
  2276. ):
  2277. # Check for attempt to plot onto specific axes and warn
  2278. if "ax" in kwargs:
  2279. msg = ("catplot is a figure-level function and does not accept "
  2280. f"target axes. You may wish to try {kind}plot")
  2281. warnings.warn(msg, UserWarning)
  2282. kwargs.pop("ax")
  2283. desaturated_kinds = ["bar", "count", "box", "violin", "boxen"]
  2284. undodged_kinds = ["strip", "swarm", "point"]
  2285. if kind in ["bar", "point", "count"]:
  2286. Plotter = _CategoricalAggPlotter
  2287. else:
  2288. Plotter = _CategoricalPlotter
  2289. if kind == "count":
  2290. if x is None and y is not None:
  2291. orient = "y"
  2292. x = 1
  2293. elif x is not None and y is None:
  2294. orient = "x"
  2295. y = 1
  2296. elif x is not None and y is not None:
  2297. raise ValueError("Cannot pass values for both `x` and `y`.")
  2298. p = Plotter(
  2299. data=data,
  2300. variables=dict(x=x, y=y, hue=hue, row=row, col=col, units=units),
  2301. order=order,
  2302. orient=orient,
  2303. # Handle special backwards compatibility where pointplot originally
  2304. # did *not* default to multi-colored unless a palette was specified.
  2305. color="C0" if kind == "point" and palette is None and color is None else color,
  2306. legend=legend,
  2307. )
  2308. for var in ["row", "col"]:
  2309. # Handle faceting variables that lack name information
  2310. if var in p.variables and p.variables[var] is None:
  2311. p.variables[var] = f"_{var}_"
  2312. # Adapt the plot_data dataframe for use with FacetGrid
  2313. facet_data = p.plot_data.rename(columns=p.variables)
  2314. facet_data = facet_data.loc[:, ~facet_data.columns.duplicated()]
  2315. col_name = p.variables.get("col", None)
  2316. row_name = p.variables.get("row", None)
  2317. if facet_kws is None:
  2318. facet_kws = {}
  2319. g = FacetGrid(
  2320. data=facet_data, row=row_name, col=col_name, col_wrap=col_wrap,
  2321. row_order=row_order, col_order=col_order, sharex=sharex, sharey=sharey,
  2322. legend_out=legend_out, margin_titles=margin_titles,
  2323. height=height, aspect=aspect,
  2324. **facet_kws,
  2325. )
  2326. # Capture this here because scale_categorical is going to insert a (null)
  2327. # x variable even if it is empty. It's not clear whether that needs to
  2328. # happen or if disabling that is the cleaner solution.
  2329. has_xy_data = p.has_xy_data
  2330. if not native_scale or p.var_types[p.orient] == "categorical":
  2331. p.scale_categorical(p.orient, order=order, formatter=formatter)
  2332. p._attach(g, log_scale=log_scale)
  2333. if not has_xy_data:
  2334. return g
  2335. # Deprecations to remove in v0.14.0.
  2336. hue_order = p._palette_without_hue_backcompat(palette, hue_order)
  2337. palette, hue_order = p._hue_backcompat(color, palette, hue_order)
  2338. # Othe deprecations
  2339. errorbar = utils._deprecate_ci(errorbar, ci)
  2340. saturation = kwargs.pop(
  2341. "saturation",
  2342. 0.75 if kind in desaturated_kinds and kwargs.get("fill", True) else 1
  2343. )
  2344. p.map_hue(palette=palette, order=hue_order, norm=hue_norm, saturation=saturation)
  2345. # Set a default color
  2346. # Otherwise each artist will be plotted separately and trip the color cycle
  2347. if hue is None:
  2348. color = "C0" if color is None else color
  2349. if saturation < 1:
  2350. color = desaturate(color, saturation)
  2351. edgecolor = p._complement_color(kwargs.pop("edgecolor", default), color, p._hue_map)
  2352. width = kwargs.pop("width", 0.8)
  2353. dodge = kwargs.pop("dodge", False if kind in undodged_kinds else "auto")
  2354. if dodge == "auto":
  2355. dodge = p._dodge_needed()
  2356. if kind == "strip":
  2357. jitter = kwargs.pop("jitter", True)
  2358. plot_kws = kwargs.copy()
  2359. plot_kws["edgecolor"] = edgecolor
  2360. plot_kws.setdefault("zorder", 3)
  2361. plot_kws.setdefault("linewidth", 0)
  2362. if "s" not in plot_kws:
  2363. plot_kws["s"] = plot_kws.pop("size", 5) ** 2
  2364. p.plot_strips(
  2365. jitter=jitter,
  2366. dodge=dodge,
  2367. color=color,
  2368. plot_kws=plot_kws,
  2369. )
  2370. elif kind == "swarm":
  2371. warn_thresh = kwargs.pop("warn_thresh", .05)
  2372. plot_kws = kwargs.copy()
  2373. plot_kws["edgecolor"] = edgecolor
  2374. plot_kws.setdefault("zorder", 3)
  2375. if "s" not in plot_kws:
  2376. plot_kws["s"] = plot_kws.pop("size", 5) ** 2
  2377. if plot_kws.setdefault("linewidth", 0) is None:
  2378. plot_kws["linewidth"] = np.sqrt(plot_kws["s"]) / 10
  2379. p.plot_swarms(
  2380. dodge=dodge,
  2381. color=color,
  2382. warn_thresh=warn_thresh,
  2383. plot_kws=plot_kws,
  2384. )
  2385. elif kind == "box":
  2386. plot_kws = kwargs.copy()
  2387. gap = plot_kws.pop("gap", 0)
  2388. fill = plot_kws.pop("fill", True)
  2389. whis = plot_kws.pop("whis", 1.5)
  2390. linewidth = plot_kws.pop("linewidth", None)
  2391. fliersize = plot_kws.pop("fliersize", 5)
  2392. linecolor = p._complement_color(
  2393. plot_kws.pop("linecolor", "auto"), color, p._hue_map
  2394. )
  2395. p.plot_boxes(
  2396. width=width,
  2397. dodge=dodge,
  2398. gap=gap,
  2399. fill=fill,
  2400. whis=whis,
  2401. color=color,
  2402. linecolor=linecolor,
  2403. linewidth=linewidth,
  2404. fliersize=fliersize,
  2405. plot_kws=plot_kws,
  2406. )
  2407. elif kind == "violin":
  2408. plot_kws = kwargs.copy()
  2409. gap = plot_kws.pop("gap", 0)
  2410. fill = plot_kws.pop("fill", True)
  2411. split = plot_kws.pop("split", False)
  2412. inner = plot_kws.pop("inner", "box")
  2413. density_norm = plot_kws.pop("density_norm", "area")
  2414. common_norm = plot_kws.pop("common_norm", False)
  2415. scale = plot_kws.pop("scale", deprecated)
  2416. scale_hue = plot_kws.pop("scale_hue", deprecated)
  2417. density_norm, common_norm = p._violin_scale_backcompat(
  2418. scale, scale_hue, density_norm, common_norm,
  2419. )
  2420. bw_method = p._violin_bw_backcompat(
  2421. plot_kws.pop("bw", deprecated), plot_kws.pop("bw_method", "scott")
  2422. )
  2423. kde_kws = dict(
  2424. cut=plot_kws.pop("cut", 2),
  2425. gridsize=plot_kws.pop("gridsize", 100),
  2426. bw_adjust=plot_kws.pop("bw_adjust", 1),
  2427. bw_method=bw_method,
  2428. )
  2429. inner_kws = plot_kws.pop("inner_kws", {}).copy()
  2430. linewidth = plot_kws.pop("linewidth", None)
  2431. linecolor = plot_kws.pop("linecolor", "auto")
  2432. linecolor = p._complement_color(linecolor, color, p._hue_map)
  2433. p.plot_violins(
  2434. width=width,
  2435. dodge=dodge,
  2436. gap=gap,
  2437. split=split,
  2438. color=color,
  2439. fill=fill,
  2440. linecolor=linecolor,
  2441. linewidth=linewidth,
  2442. inner=inner,
  2443. density_norm=density_norm,
  2444. common_norm=common_norm,
  2445. kde_kws=kde_kws,
  2446. inner_kws=inner_kws,
  2447. plot_kws=plot_kws,
  2448. )
  2449. elif kind == "boxen":
  2450. plot_kws = kwargs.copy()
  2451. gap = plot_kws.pop("gap", 0)
  2452. fill = plot_kws.pop("fill", True)
  2453. linecolor = plot_kws.pop("linecolor", "auto")
  2454. linewidth = plot_kws.pop("linewidth", None)
  2455. k_depth = plot_kws.pop("k_depth", "tukey")
  2456. width_method = plot_kws.pop("width_method", "exponential")
  2457. outlier_prop = plot_kws.pop("outlier_prop", 0.007)
  2458. trust_alpha = plot_kws.pop("trust_alpha", 0.05)
  2459. showfliers = plot_kws.pop("showfliers", True)
  2460. box_kws = plot_kws.pop("box_kws", {})
  2461. flier_kws = plot_kws.pop("flier_kws", {})
  2462. line_kws = plot_kws.pop("line_kws", {})
  2463. if "scale" in plot_kws:
  2464. width_method = p._boxen_scale_backcompat(
  2465. plot_kws["scale"], width_method
  2466. )
  2467. linecolor = p._complement_color(linecolor, color, p._hue_map)
  2468. p.plot_boxens(
  2469. width=width,
  2470. dodge=dodge,
  2471. gap=gap,
  2472. fill=fill,
  2473. color=color,
  2474. linecolor=linecolor,
  2475. linewidth=linewidth,
  2476. width_method=width_method,
  2477. k_depth=k_depth,
  2478. outlier_prop=outlier_prop,
  2479. trust_alpha=trust_alpha,
  2480. showfliers=showfliers,
  2481. box_kws=box_kws,
  2482. flier_kws=flier_kws,
  2483. line_kws=line_kws,
  2484. plot_kws=plot_kws,
  2485. )
  2486. elif kind == "point":
  2487. aggregator = EstimateAggregator(
  2488. estimator, errorbar, n_boot=n_boot, seed=seed
  2489. )
  2490. markers = kwargs.pop("markers", default)
  2491. linestyles = kwargs.pop("linestyles", default)
  2492. # Deprecations to remove in v0.15.0.
  2493. # TODO Uncomment when removing deprecation backcompat
  2494. # capsize = kwargs.pop("capsize", 0)
  2495. # err_kws = _normalize_kwargs(kwargs.pop("err_kws", {}), mpl.lines.Line2D)
  2496. p._point_kwargs_backcompat(
  2497. kwargs.pop("scale", deprecated),
  2498. kwargs.pop("join", deprecated),
  2499. kwargs
  2500. )
  2501. err_kws, capsize = p._err_kws_backcompat(
  2502. _normalize_kwargs(kwargs.pop("err_kws", {}), mpl.lines.Line2D),
  2503. None,
  2504. errwidth=kwargs.pop("errwidth", deprecated),
  2505. capsize=kwargs.pop("capsize", 0),
  2506. )
  2507. p.plot_points(
  2508. aggregator=aggregator,
  2509. markers=markers,
  2510. linestyles=linestyles,
  2511. dodge=dodge,
  2512. color=color,
  2513. capsize=capsize,
  2514. err_kws=err_kws,
  2515. plot_kws=kwargs,
  2516. )
  2517. elif kind == "bar":
  2518. aggregator = EstimateAggregator(
  2519. estimator, errorbar, n_boot=n_boot, seed=seed
  2520. )
  2521. err_kws, capsize = p._err_kws_backcompat(
  2522. _normalize_kwargs(kwargs.pop("err_kws", {}), mpl.lines.Line2D),
  2523. errcolor=kwargs.pop("errcolor", deprecated),
  2524. errwidth=kwargs.pop("errwidth", deprecated),
  2525. capsize=kwargs.pop("capsize", 0),
  2526. )
  2527. gap = kwargs.pop("gap", 0)
  2528. fill = kwargs.pop("fill", True)
  2529. p.plot_bars(
  2530. aggregator=aggregator,
  2531. dodge=dodge,
  2532. width=width,
  2533. gap=gap,
  2534. color=color,
  2535. fill=fill,
  2536. capsize=capsize,
  2537. err_kws=err_kws,
  2538. plot_kws=kwargs,
  2539. )
  2540. elif kind == "count":
  2541. aggregator = EstimateAggregator("sum", errorbar=None)
  2542. count_axis = {"x": "y", "y": "x"}[p.orient]
  2543. p.plot_data[count_axis] = 1
  2544. stat_options = ["count", "percent", "probability", "proportion"]
  2545. stat = _check_argument("stat", stat_options, kwargs.pop("stat", "count"))
  2546. p.variables[count_axis] = stat
  2547. if stat != "count":
  2548. denom = 100 if stat == "percent" else 1
  2549. p.plot_data[count_axis] /= len(p.plot_data) / denom
  2550. gap = kwargs.pop("gap", 0)
  2551. fill = kwargs.pop("fill", True)
  2552. p.plot_bars(
  2553. aggregator=aggregator,
  2554. dodge=dodge,
  2555. width=width,
  2556. gap=gap,
  2557. color=color,
  2558. fill=fill,
  2559. capsize=0,
  2560. err_kws={},
  2561. plot_kws=kwargs,
  2562. )
  2563. else:
  2564. msg = (
  2565. f"Invalid `kind`: {kind!r}. Options are 'strip', 'swarm', "
  2566. "'box', 'boxen', 'violin', 'bar', 'count', and 'point'."
  2567. )
  2568. raise ValueError(msg)
  2569. for ax in g.axes.flat:
  2570. p._adjust_cat_axis(ax, axis=p.orient)
  2571. g.set_axis_labels(p.variables.get("x"), p.variables.get("y"))
  2572. g.set_titles()
  2573. g.tight_layout()
  2574. for ax in g.axes.flat:
  2575. g._update_legend_data(ax)
  2576. ax.legend_ = None
  2577. if legend and "hue" in p.variables and p.input_format == "long":
  2578. g.add_legend(title=p.variables.get("hue"), label_order=hue_order)
  2579. if data is not None:
  2580. # Replace the dataframe on the FacetGrid for any subsequent maps
  2581. g.data = data
  2582. return g
  2583. catplot.__doc__ = dedent("""\
  2584. Figure-level interface for drawing categorical plots onto a FacetGrid.
  2585. This function provides access to several axes-level functions that
  2586. show the relationship between a numerical and one or more categorical
  2587. variables using one of several visual representations. The `kind`
  2588. parameter selects the underlying axes-level function to use.
  2589. Categorical scatterplots:
  2590. - :func:`stripplot` (with `kind="strip"`; the default)
  2591. - :func:`swarmplot` (with `kind="swarm"`)
  2592. Categorical distribution plots:
  2593. - :func:`boxplot` (with `kind="box"`)
  2594. - :func:`violinplot` (with `kind="violin"`)
  2595. - :func:`boxenplot` (with `kind="boxen"`)
  2596. Categorical estimate plots:
  2597. - :func:`pointplot` (with `kind="point"`)
  2598. - :func:`barplot` (with `kind="bar"`)
  2599. - :func:`countplot` (with `kind="count"`)
  2600. Extra keyword arguments are passed to the underlying function, so you
  2601. should refer to the documentation for each to see kind-specific options.
  2602. {categorical_narrative}
  2603. After plotting, the :class:`FacetGrid` with the plot is returned and can
  2604. be used directly to tweak supporting plot details or add other layers.
  2605. Parameters
  2606. ----------
  2607. {categorical_data}
  2608. {input_params}
  2609. row, col : names of variables in `data` or vector data
  2610. Categorical variables that will determine the faceting of the grid.
  2611. kind : str
  2612. The kind of plot to draw, corresponds to the name of a categorical
  2613. axes-level plotting function. Options are: "strip", "swarm", "box", "violin",
  2614. "boxen", "point", "bar", or "count".
  2615. {stat_api_params}
  2616. {order_vars}
  2617. row_order, col_order : lists of strings
  2618. Order to organize the rows and/or columns of the grid in; otherwise the
  2619. orders are inferred from the data objects.
  2620. {col_wrap}
  2621. {height}
  2622. {aspect}
  2623. {native_scale}
  2624. {formatter}
  2625. {orient}
  2626. {color}
  2627. {palette}
  2628. {hue_norm}
  2629. {legend}
  2630. {legend_out}
  2631. {share_xy}
  2632. {margin_titles}
  2633. facet_kws : dict
  2634. Dictionary of other keyword arguments to pass to :class:`FacetGrid`.
  2635. kwargs : key, value pairings
  2636. Other keyword arguments are passed through to the underlying plotting
  2637. function.
  2638. Returns
  2639. -------
  2640. :class:`FacetGrid`
  2641. Returns the :class:`FacetGrid` object with the plot on it for further
  2642. tweaking.
  2643. Examples
  2644. --------
  2645. .. include:: ../docstrings/catplot.rst
  2646. """).format(**_categorical_docs)
  2647. class Beeswarm:
  2648. """Modifies a scatterplot artist to show a beeswarm plot."""
  2649. def __init__(self, orient="x", width=0.8, warn_thresh=.05):
  2650. self.orient = orient
  2651. self.width = width
  2652. self.warn_thresh = warn_thresh
  2653. def __call__(self, points, center):
  2654. """Swarm `points`, a PathCollection, around the `center` position."""
  2655. # Convert from point size (area) to diameter
  2656. ax = points.axes
  2657. dpi = ax.figure.dpi
  2658. # Get the original positions of the points
  2659. orig_xy_data = points.get_offsets()
  2660. # Reset the categorical positions to the center line
  2661. cat_idx = 1 if self.orient == "y" else 0
  2662. orig_xy_data[:, cat_idx] = center
  2663. # Transform the data coordinates to point coordinates.
  2664. # We'll figure out the swarm positions in the latter
  2665. # and then convert back to data coordinates and replot
  2666. orig_x_data, orig_y_data = orig_xy_data.T
  2667. orig_xy = ax.transData.transform(orig_xy_data)
  2668. # Order the variables so that x is the categorical axis
  2669. if self.orient == "y":
  2670. orig_xy = orig_xy[:, [1, 0]]
  2671. # Add a column with each point's radius
  2672. sizes = points.get_sizes()
  2673. if sizes.size == 1:
  2674. sizes = np.repeat(sizes, orig_xy.shape[0])
  2675. edge = points.get_linewidth().item()
  2676. radii = (np.sqrt(sizes) + edge) / 2 * (dpi / 72)
  2677. orig_xy = np.c_[orig_xy, radii]
  2678. # Sort along the value axis to facilitate the beeswarm
  2679. sorter = np.argsort(orig_xy[:, 1])
  2680. orig_xyr = orig_xy[sorter]
  2681. # Adjust points along the categorical axis to prevent overlaps
  2682. new_xyr = np.empty_like(orig_xyr)
  2683. new_xyr[sorter] = self.beeswarm(orig_xyr)
  2684. # Transform the point coordinates back to data coordinates
  2685. if self.orient == "y":
  2686. new_xy = new_xyr[:, [1, 0]]
  2687. else:
  2688. new_xy = new_xyr[:, :2]
  2689. new_x_data, new_y_data = ax.transData.inverted().transform(new_xy).T
  2690. # Add gutters
  2691. t_fwd, t_inv = _get_transform_functions(ax, self.orient)
  2692. if self.orient == "y":
  2693. self.add_gutters(new_y_data, center, t_fwd, t_inv)
  2694. else:
  2695. self.add_gutters(new_x_data, center, t_fwd, t_inv)
  2696. # Reposition the points so they do not overlap
  2697. if self.orient == "y":
  2698. points.set_offsets(np.c_[orig_x_data, new_y_data])
  2699. else:
  2700. points.set_offsets(np.c_[new_x_data, orig_y_data])
  2701. def beeswarm(self, orig_xyr):
  2702. """Adjust x position of points to avoid overlaps."""
  2703. # In this method, `x` is always the categorical axis
  2704. # Center of the swarm, in point coordinates
  2705. midline = orig_xyr[0, 0]
  2706. # Start the swarm with the first point
  2707. swarm = np.atleast_2d(orig_xyr[0])
  2708. # Loop over the remaining points
  2709. for xyr_i in orig_xyr[1:]:
  2710. # Find the points in the swarm that could possibly
  2711. # overlap with the point we are currently placing
  2712. neighbors = self.could_overlap(xyr_i, swarm)
  2713. # Find positions that would be valid individually
  2714. # with respect to each of the swarm neighbors
  2715. candidates = self.position_candidates(xyr_i, neighbors)
  2716. # Sort candidates by their centrality
  2717. offsets = np.abs(candidates[:, 0] - midline)
  2718. candidates = candidates[np.argsort(offsets)]
  2719. # Find the first candidate that does not overlap any neighbors
  2720. new_xyr_i = self.first_non_overlapping_candidate(candidates, neighbors)
  2721. # Place it into the swarm
  2722. swarm = np.vstack([swarm, new_xyr_i])
  2723. return swarm
  2724. def could_overlap(self, xyr_i, swarm):
  2725. """Return a list of all swarm points that could overlap with target."""
  2726. # Because we work backwards through the swarm and can short-circuit,
  2727. # the for-loop is faster than vectorization
  2728. _, y_i, r_i = xyr_i
  2729. neighbors = []
  2730. for xyr_j in reversed(swarm):
  2731. _, y_j, r_j = xyr_j
  2732. if (y_i - y_j) < (r_i + r_j):
  2733. neighbors.append(xyr_j)
  2734. else:
  2735. break
  2736. return np.array(neighbors)[::-1]
  2737. def position_candidates(self, xyr_i, neighbors):
  2738. """Return a list of coordinates that might be valid by adjusting x."""
  2739. candidates = [xyr_i]
  2740. x_i, y_i, r_i = xyr_i
  2741. left_first = True
  2742. for x_j, y_j, r_j in neighbors:
  2743. dy = y_i - y_j
  2744. dx = np.sqrt(max((r_i + r_j) ** 2 - dy ** 2, 0)) * 1.05
  2745. cl, cr = (x_j - dx, y_i, r_i), (x_j + dx, y_i, r_i)
  2746. if left_first:
  2747. new_candidates = [cl, cr]
  2748. else:
  2749. new_candidates = [cr, cl]
  2750. candidates.extend(new_candidates)
  2751. left_first = not left_first
  2752. return np.array(candidates)
  2753. def first_non_overlapping_candidate(self, candidates, neighbors):
  2754. """Find the first candidate that does not overlap with the swarm."""
  2755. # If we have no neighbors, all candidates are good.
  2756. if len(neighbors) == 0:
  2757. return candidates[0]
  2758. neighbors_x = neighbors[:, 0]
  2759. neighbors_y = neighbors[:, 1]
  2760. neighbors_r = neighbors[:, 2]
  2761. for xyr_i in candidates:
  2762. x_i, y_i, r_i = xyr_i
  2763. dx = neighbors_x - x_i
  2764. dy = neighbors_y - y_i
  2765. sq_distances = np.square(dx) + np.square(dy)
  2766. sep_needed = np.square(neighbors_r + r_i)
  2767. # Good candidate does not overlap any of neighbors which means that
  2768. # squared distance between candidate and any of the neighbors has
  2769. # to be at least square of the summed radii
  2770. good_candidate = np.all(sq_distances >= sep_needed)
  2771. if good_candidate:
  2772. return xyr_i
  2773. raise RuntimeError(
  2774. "No non-overlapping candidates found. This should not happen."
  2775. )
  2776. def add_gutters(self, points, center, trans_fwd, trans_inv):
  2777. """Stop points from extending beyond their territory."""
  2778. half_width = self.width / 2
  2779. low_gutter = trans_inv(trans_fwd(center) - half_width)
  2780. off_low = points < low_gutter
  2781. if off_low.any():
  2782. points[off_low] = low_gutter
  2783. high_gutter = trans_inv(trans_fwd(center) + half_width)
  2784. off_high = points > high_gutter
  2785. if off_high.any():
  2786. points[off_high] = high_gutter
  2787. gutter_prop = (off_high + off_low).sum() / len(points)
  2788. if gutter_prop > self.warn_thresh:
  2789. msg = (
  2790. "{:.1%} of the points cannot be placed; you may want "
  2791. "to decrease the size of the markers or use stripplot."
  2792. ).format(gutter_prop)
  2793. warnings.warn(msg, UserWarning)
  2794. return points
  2795. BoxPlotArtists = namedtuple("BoxPlotArtists", "box median whiskers caps fliers mean")
  2796. class BoxPlotContainer:
  2797. def __init__(self, artist_dict):
  2798. self.boxes = artist_dict["boxes"]
  2799. self.medians = artist_dict["medians"]
  2800. self.whiskers = artist_dict["whiskers"]
  2801. self.caps = artist_dict["caps"]
  2802. self.fliers = artist_dict["fliers"]
  2803. self.means = artist_dict["means"]
  2804. self._label = None
  2805. self._children = [
  2806. *self.boxes,
  2807. *self.medians,
  2808. *self.whiskers,
  2809. *self.caps,
  2810. *self.fliers,
  2811. *self.means,
  2812. ]
  2813. def __repr__(self):
  2814. return f"<BoxPlotContainer object with {len(self.boxes)} boxes>"
  2815. def __getitem__(self, idx):
  2816. pair_slice = slice(2 * idx, 2 * idx + 2)
  2817. return BoxPlotArtists(
  2818. self.boxes[idx] if self.boxes else [],
  2819. self.medians[idx] if self.medians else [],
  2820. self.whiskers[pair_slice] if self.whiskers else [],
  2821. self.caps[pair_slice] if self.caps else [],
  2822. self.fliers[idx] if self.fliers else [],
  2823. self.means[idx]if self.means else [],
  2824. )
  2825. def __iter__(self):
  2826. yield from (self[i] for i in range(len(self.boxes)))
  2827. def get_label(self):
  2828. return self._label
  2829. def set_label(self, value):
  2830. self._label = value
  2831. def get_children(self):
  2832. return self._children
  2833. def remove(self):
  2834. for child in self._children:
  2835. child.remove()