12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531 |
- """Plotting functions for visualizing distributions."""
- from numbers import Number
- from functools import partial
- import math
- import textwrap
- import warnings
- import numpy as np
- import pandas as pd
- import matplotlib as mpl
- import matplotlib.pyplot as plt
- import matplotlib.transforms as tx
- from matplotlib.colors import to_rgba
- from matplotlib.collections import LineCollection
- from ._base import VectorPlotter
- # We have moved univariate histogram computation over to the new Hist class,
- # but still use the older Histogram for bivariate computation.
- from ._statistics import ECDF, Histogram, KDE
- from ._stats.counting import Hist
- from .axisgrid import (
- FacetGrid,
- _facet_docs,
- )
- from .utils import (
- remove_na,
- _get_transform_functions,
- _kde_support,
- _normalize_kwargs,
- _check_argument,
- _assign_default_kwargs,
- _default_color,
- )
- from .palettes import color_palette
- from .external import husl
- from .external.kde import gaussian_kde
- from ._docstrings import (
- DocstringComponents,
- _core_docs,
- )
- __all__ = ["displot", "histplot", "kdeplot", "ecdfplot", "rugplot", "distplot"]
- # ==================================================================================== #
- # Module documentation
- # ==================================================================================== #
- _dist_params = dict(
- multiple="""
- multiple : {{"layer", "stack", "fill"}}
- Method for drawing multiple elements when semantic mapping creates subsets.
- Only relevant with univariate data.
- """,
- log_scale="""
- log_scale : bool or number, or pair of bools or numbers
- Set axis scale(s) to log. A single value sets the data axis for any numeric
- axes in the plot. A pair of values sets each axis independently.
- Numeric values are interpreted as the desired base (default 10).
- When `None` or `False`, seaborn defers to the existing Axes scale.
- """,
- legend="""
- legend : bool
- If False, suppress the legend for semantic variables.
- """,
- cbar="""
- cbar : bool
- If True, add a colorbar to annotate the color mapping in a bivariate plot.
- Note: Does not currently support plots with a ``hue`` variable well.
- """,
- cbar_ax="""
- cbar_ax : :class:`matplotlib.axes.Axes`
- Pre-existing axes for the colorbar.
- """,
- cbar_kws="""
- cbar_kws : dict
- Additional parameters passed to :meth:`matplotlib.figure.Figure.colorbar`.
- """,
- )
- _param_docs = DocstringComponents.from_nested_components(
- core=_core_docs["params"],
- facets=DocstringComponents(_facet_docs),
- dist=DocstringComponents(_dist_params),
- kde=DocstringComponents.from_function_params(KDE.__init__),
- hist=DocstringComponents.from_function_params(Histogram.__init__),
- ecdf=DocstringComponents.from_function_params(ECDF.__init__),
- )
- # ==================================================================================== #
- # Internal API
- # ==================================================================================== #
- class _DistributionPlotter(VectorPlotter):
- wide_structure = {"x": "@values", "hue": "@columns"}
- flat_structure = {"x": "@values"}
- def __init__(
- self,
- data=None,
- variables={},
- ):
- super().__init__(data=data, variables=variables)
- @property
- def univariate(self):
- """Return True if only x or y are used."""
- # TODO this could go down to core, but putting it here now.
- # We'd want to be conceptually clear that univariate only applies
- # to x/y and not to other semantics, which can exist.
- # We haven't settled on a good conceptual name for x/y.
- return bool({"x", "y"} - set(self.variables))
- @property
- def data_variable(self):
- """Return the variable with data for univariate plots."""
- # TODO This could also be in core, but it should have a better name.
- if not self.univariate:
- raise AttributeError("This is not a univariate plot")
- return {"x", "y"}.intersection(self.variables).pop()
- @property
- def has_xy_data(self):
- """Return True at least one of x or y is defined."""
- # TODO see above points about where this should go
- return bool({"x", "y"} & set(self.variables))
- def _add_legend(
- self,
- ax_obj, artist, fill, element, multiple, alpha, artist_kws, legend_kws,
- ):
- """Add artists that reflect semantic mappings and put then in a legend."""
- # TODO note that this doesn't handle numeric mappings like the relational plots
- handles = []
- labels = []
- for level in self._hue_map.levels:
- color = self._hue_map(level)
- kws = self._artist_kws(
- artist_kws, fill, element, multiple, color, alpha
- )
- # color gets added to the kws to workaround an issue with barplot's color
- # cycle integration but it causes problems in this context where we are
- # setting artist properties directly, so pop it off here
- if "facecolor" in kws:
- kws.pop("color", None)
- handles.append(artist(**kws))
- labels.append(level)
- if isinstance(ax_obj, mpl.axes.Axes):
- ax_obj.legend(handles, labels, title=self.variables["hue"], **legend_kws)
- else: # i.e. a FacetGrid. TODO make this better
- legend_data = dict(zip(labels, handles))
- ax_obj.add_legend(
- legend_data,
- title=self.variables["hue"],
- label_order=self.var_levels["hue"],
- **legend_kws
- )
- def _artist_kws(self, kws, fill, element, multiple, color, alpha):
- """Handle differences between artists in filled/unfilled plots."""
- kws = kws.copy()
- if fill:
- kws = _normalize_kwargs(kws, mpl.collections.PolyCollection)
- kws.setdefault("facecolor", to_rgba(color, alpha))
- if element == "bars":
- # Make bar() interface with property cycle correctly
- # https://github.com/matplotlib/matplotlib/issues/19385
- kws["color"] = "none"
- if multiple in ["stack", "fill"] or element == "bars":
- kws.setdefault("edgecolor", mpl.rcParams["patch.edgecolor"])
- else:
- kws.setdefault("edgecolor", to_rgba(color, 1))
- elif element == "bars":
- kws["facecolor"] = "none"
- kws["edgecolor"] = to_rgba(color, alpha)
- else:
- kws["color"] = to_rgba(color, alpha)
- return kws
- def _quantile_to_level(self, data, quantile):
- """Return data levels corresponding to quantile cuts of mass."""
- isoprop = np.asarray(quantile)
- values = np.ravel(data)
- sorted_values = np.sort(values)[::-1]
- normalized_values = np.cumsum(sorted_values) / values.sum()
- idx = np.searchsorted(normalized_values, 1 - isoprop)
- levels = np.take(sorted_values, idx, mode="clip")
- return levels
- def _cmap_from_color(self, color):
- """Return a sequential colormap given a color seed."""
- # Like so much else here, this is broadly useful, but keeping it
- # in this class to signify that I haven't thought overly hard about it...
- r, g, b, _ = to_rgba(color)
- h, s, _ = husl.rgb_to_husl(r, g, b)
- xx = np.linspace(-1, 1, int(1.15 * 256))[:256]
- ramp = np.zeros((256, 3))
- ramp[:, 0] = h
- ramp[:, 1] = s * np.cos(xx)
- ramp[:, 2] = np.linspace(35, 80, 256)
- colors = np.clip([husl.husl_to_rgb(*hsl) for hsl in ramp], 0, 1)
- return mpl.colors.ListedColormap(colors[::-1])
- def _default_discrete(self):
- """Find default values for discrete hist estimation based on variable type."""
- if self.univariate:
- discrete = self.var_types[self.data_variable] == "categorical"
- else:
- discrete_x = self.var_types["x"] == "categorical"
- discrete_y = self.var_types["y"] == "categorical"
- discrete = discrete_x, discrete_y
- return discrete
- def _resolve_multiple(self, curves, multiple):
- """Modify the density data structure to handle multiple densities."""
- # Default baselines have all densities starting at 0
- baselines = {k: np.zeros_like(v) for k, v in curves.items()}
- # TODO we should have some central clearinghouse for checking if any
- # "grouping" (terminnology?) semantics have been assigned
- if "hue" not in self.variables:
- return curves, baselines
- if multiple in ("stack", "fill"):
- # Setting stack or fill means that the curves share a
- # support grid / set of bin edges, so we can make a dataframe
- # Reverse the column order to plot from top to bottom
- curves = pd.DataFrame(curves).iloc[:, ::-1]
- # Find column groups that are nested within col/row variables
- column_groups = {}
- for i, keyd in enumerate(map(dict, curves.columns)):
- facet_key = keyd.get("col", None), keyd.get("row", None)
- column_groups.setdefault(facet_key, [])
- column_groups[facet_key].append(i)
- baselines = curves.copy()
- for col_idxs in column_groups.values():
- cols = curves.columns[col_idxs]
- norm_constant = curves[cols].sum(axis="columns")
- # Take the cumulative sum to stack
- curves[cols] = curves[cols].cumsum(axis="columns")
- # Normalize by row sum to fill
- if multiple == "fill":
- curves[cols] = curves[cols].div(norm_constant, axis="index")
- # Define where each segment starts
- baselines[cols] = curves[cols].shift(1, axis=1).fillna(0)
- if multiple == "dodge":
- # Account for the unique semantic (non-faceting) levels
- # This will require rethiniking if we add other semantics!
- hue_levels = self.var_levels["hue"]
- n = len(hue_levels)
- f_fwd, f_inv = self._get_scale_transforms(self.data_variable)
- for key in curves:
- level = dict(key)["hue"]
- hist = curves[key].reset_index(name="heights")
- level_idx = hue_levels.index(level)
- a = f_fwd(hist["edges"])
- b = f_fwd(hist["edges"] + hist["widths"])
- w = (b - a) / n
- new_min = f_inv(a + level_idx * w)
- new_max = f_inv(a + (level_idx + 1) * w)
- hist["widths"] = new_max - new_min
- hist["edges"] = new_min
- curves[key] = hist.set_index(["edges", "widths"])["heights"]
- return curves, baselines
- # -------------------------------------------------------------------------------- #
- # Computation
- # -------------------------------------------------------------------------------- #
- def _compute_univariate_density(
- self,
- data_variable,
- common_norm,
- common_grid,
- estimate_kws,
- warn_singular=True,
- ):
- # Initialize the estimator object
- estimator = KDE(**estimate_kws)
- if set(self.variables) - {"x", "y"}:
- if common_grid:
- all_observations = self.comp_data.dropna()
- estimator.define_support(all_observations[data_variable])
- else:
- common_norm = False
- all_data = self.plot_data.dropna()
- if common_norm and "weights" in all_data:
- whole_weight = all_data["weights"].sum()
- else:
- whole_weight = len(all_data)
- densities = {}
- for sub_vars, sub_data in self.iter_data("hue", from_comp_data=True):
- # Extract the data points from this sub set and remove nulls
- observations = sub_data[data_variable]
- # Extract the weights for this subset of observations
- if "weights" in self.variables:
- weights = sub_data["weights"]
- part_weight = weights.sum()
- else:
- weights = None
- part_weight = len(sub_data)
- # Estimate the density of observations at this level
- variance = np.nan_to_num(observations.var())
- singular = len(observations) < 2 or math.isclose(variance, 0)
- try:
- if not singular:
- # Convoluted approach needed because numerical failures
- # can manifest in a few different ways.
- density, support = estimator(observations, weights=weights)
- except np.linalg.LinAlgError:
- singular = True
- if singular:
- msg = (
- "Dataset has 0 variance; skipping density estimate. "
- "Pass `warn_singular=False` to disable this warning."
- )
- if warn_singular:
- warnings.warn(msg, UserWarning, stacklevel=4)
- continue
- # Invert the scaling of the support points
- _, f_inv = self._get_scale_transforms(self.data_variable)
- support = f_inv(support)
- # Apply a scaling factor so that the integral over all subsets is 1
- if common_norm:
- density *= part_weight / whole_weight
- # Store the density for this level
- key = tuple(sub_vars.items())
- densities[key] = pd.Series(density, index=support)
- return densities
- # -------------------------------------------------------------------------------- #
- # Plotting
- # -------------------------------------------------------------------------------- #
- def plot_univariate_histogram(
- self,
- multiple,
- element,
- fill,
- common_norm,
- common_bins,
- shrink,
- kde,
- kde_kws,
- color,
- legend,
- line_kws,
- estimate_kws,
- **plot_kws,
- ):
- # -- Default keyword dicts
- kde_kws = {} if kde_kws is None else kde_kws.copy()
- line_kws = {} if line_kws is None else line_kws.copy()
- estimate_kws = {} if estimate_kws is None else estimate_kws.copy()
- # -- Input checking
- _check_argument("multiple", ["layer", "stack", "fill", "dodge"], multiple)
- _check_argument("element", ["bars", "step", "poly"], element)
- auto_bins_with_weights = (
- "weights" in self.variables
- and estimate_kws["bins"] == "auto"
- and estimate_kws["binwidth"] is None
- and not estimate_kws["discrete"]
- )
- if auto_bins_with_weights:
- msg = (
- "`bins` cannot be 'auto' when using weights. "
- "Setting `bins=10`, but you will likely want to adjust."
- )
- warnings.warn(msg, UserWarning)
- estimate_kws["bins"] = 10
- # Simplify downstream code if we are not normalizing
- if estimate_kws["stat"] == "count":
- common_norm = False
- orient = self.data_variable
- # Now initialize the Histogram estimator
- estimator = Hist(**estimate_kws)
- histograms = {}
- # Do pre-compute housekeeping related to multiple groups
- all_data = self.comp_data.dropna()
- all_weights = all_data.get("weights", None)
- multiple_histograms = set(self.variables) - {"x", "y"}
- if multiple_histograms:
- if common_bins:
- bin_kws = estimator._define_bin_params(all_data, orient, None)
- else:
- common_norm = False
- if common_norm and all_weights is not None:
- whole_weight = all_weights.sum()
- else:
- whole_weight = len(all_data)
- # Estimate the smoothed kernel densities, for use later
- if kde:
- # TODO alternatively, clip at min/max bins?
- kde_kws.setdefault("cut", 0)
- kde_kws["cumulative"] = estimate_kws["cumulative"]
- densities = self._compute_univariate_density(
- self.data_variable,
- common_norm,
- common_bins,
- kde_kws,
- warn_singular=False,
- )
- # First pass through the data to compute the histograms
- for sub_vars, sub_data in self.iter_data("hue", from_comp_data=True):
- # Prepare the relevant data
- key = tuple(sub_vars.items())
- orient = self.data_variable
- if "weights" in self.variables:
- sub_data["weight"] = sub_data.pop("weights")
- part_weight = sub_data["weight"].sum()
- else:
- part_weight = len(sub_data)
- # Do the histogram computation
- if not (multiple_histograms and common_bins):
- bin_kws = estimator._define_bin_params(sub_data, orient, None)
- res = estimator._normalize(estimator._eval(sub_data, orient, bin_kws))
- heights = res[estimator.stat].to_numpy()
- widths = res["space"].to_numpy()
- edges = res[orient].to_numpy() - widths / 2
- # Rescale the smoothed curve to match the histogram
- if kde and key in densities:
- density = densities[key]
- if estimator.cumulative:
- hist_norm = heights.max()
- else:
- hist_norm = (heights * widths).sum()
- densities[key] *= hist_norm
- # Convert edges back to original units for plotting
- ax = self._get_axes(sub_vars)
- _, inv = _get_transform_functions(ax, self.data_variable)
- widths = inv(edges + widths) - inv(edges)
- edges = inv(edges)
- # Pack the histogram data and metadata together
- edges = edges + (1 - shrink) / 2 * widths
- widths *= shrink
- index = pd.MultiIndex.from_arrays([
- pd.Index(edges, name="edges"),
- pd.Index(widths, name="widths"),
- ])
- hist = pd.Series(heights, index=index, name="heights")
- # Apply scaling to normalize across groups
- if common_norm:
- hist *= part_weight / whole_weight
- # Store the finalized histogram data for future plotting
- histograms[key] = hist
- # Modify the histogram and density data to resolve multiple groups
- histograms, baselines = self._resolve_multiple(histograms, multiple)
- if kde:
- densities, _ = self._resolve_multiple(
- densities, None if multiple == "dodge" else multiple
- )
- # Set autoscaling-related meta
- sticky_stat = (0, 1) if multiple == "fill" else (0, np.inf)
- if multiple == "fill":
- # Filled plots should not have any margins
- bin_vals = histograms.index.to_frame()
- edges = bin_vals["edges"]
- widths = bin_vals["widths"]
- sticky_data = (
- edges.min(),
- edges.max() + widths.loc[edges.idxmax()]
- )
- else:
- sticky_data = []
- # --- Handle default visual attributes
- # Note: default linewidth is determined after plotting
- # Default alpha should depend on other parameters
- if fill:
- # Note: will need to account for other grouping semantics if added
- if "hue" in self.variables and multiple == "layer":
- default_alpha = .5 if element == "bars" else .25
- elif kde:
- default_alpha = .5
- else:
- default_alpha = .75
- else:
- default_alpha = 1
- alpha = plot_kws.pop("alpha", default_alpha) # TODO make parameter?
- hist_artists = []
- # Go back through the dataset and draw the plots
- for sub_vars, _ in self.iter_data("hue", reverse=True):
- key = tuple(sub_vars.items())
- hist = histograms[key].rename("heights").reset_index()
- bottom = np.asarray(baselines[key])
- ax = self._get_axes(sub_vars)
- # Define the matplotlib attributes that depend on semantic mapping
- if "hue" in self.variables:
- sub_color = self._hue_map(sub_vars["hue"])
- else:
- sub_color = color
- artist_kws = self._artist_kws(
- plot_kws, fill, element, multiple, sub_color, alpha
- )
- if element == "bars":
- # Use matplotlib bar plotting
- plot_func = ax.bar if self.data_variable == "x" else ax.barh
- artists = plot_func(
- hist["edges"],
- hist["heights"] - bottom,
- hist["widths"],
- bottom,
- align="edge",
- **artist_kws,
- )
- for bar in artists:
- if self.data_variable == "x":
- bar.sticky_edges.x[:] = sticky_data
- bar.sticky_edges.y[:] = sticky_stat
- else:
- bar.sticky_edges.x[:] = sticky_stat
- bar.sticky_edges.y[:] = sticky_data
- hist_artists.extend(artists)
- else:
- # Use either fill_between or plot to draw hull of histogram
- if element == "step":
- final = hist.iloc[-1]
- x = np.append(hist["edges"], final["edges"] + final["widths"])
- y = np.append(hist["heights"], final["heights"])
- b = np.append(bottom, bottom[-1])
- if self.data_variable == "x":
- step = "post"
- drawstyle = "steps-post"
- else:
- step = "post" # fillbetweenx handles mapping internally
- drawstyle = "steps-pre"
- elif element == "poly":
- x = hist["edges"] + hist["widths"] / 2
- y = hist["heights"]
- b = bottom
- step = None
- drawstyle = None
- if self.data_variable == "x":
- if fill:
- artist = ax.fill_between(x, b, y, step=step, **artist_kws)
- else:
- artist, = ax.plot(x, y, drawstyle=drawstyle, **artist_kws)
- artist.sticky_edges.x[:] = sticky_data
- artist.sticky_edges.y[:] = sticky_stat
- else:
- if fill:
- artist = ax.fill_betweenx(x, b, y, step=step, **artist_kws)
- else:
- artist, = ax.plot(y, x, drawstyle=drawstyle, **artist_kws)
- artist.sticky_edges.x[:] = sticky_stat
- artist.sticky_edges.y[:] = sticky_data
- hist_artists.append(artist)
- if kde:
- # Add in the density curves
- try:
- density = densities[key]
- except KeyError:
- continue
- support = density.index
- if "x" in self.variables:
- line_args = support, density
- sticky_x, sticky_y = None, (0, np.inf)
- else:
- line_args = density, support
- sticky_x, sticky_y = (0, np.inf), None
- line_kws["color"] = to_rgba(sub_color, 1)
- line, = ax.plot(
- *line_args, **line_kws,
- )
- if sticky_x is not None:
- line.sticky_edges.x[:] = sticky_x
- if sticky_y is not None:
- line.sticky_edges.y[:] = sticky_y
- if element == "bars" and "linewidth" not in plot_kws:
- # Now we handle linewidth, which depends on the scaling of the plot
- # We will base everything on the minimum bin width
- hist_metadata = pd.concat([
- # Use .items for generality over dict or df
- h.index.to_frame() for _, h in histograms.items()
- ]).reset_index(drop=True)
- thin_bar_idx = hist_metadata["widths"].idxmin()
- binwidth = hist_metadata.loc[thin_bar_idx, "widths"]
- left_edge = hist_metadata.loc[thin_bar_idx, "edges"]
- # Set initial value
- default_linewidth = math.inf
- # Loop through subsets based only on facet variables
- for sub_vars, _ in self.iter_data():
- ax = self._get_axes(sub_vars)
- # Needed in some cases to get valid transforms.
- # Innocuous in other cases?
- ax.autoscale_view()
- # Convert binwidth from data coordinates to pixels
- pts_x, pts_y = 72 / ax.figure.dpi * abs(
- ax.transData.transform([left_edge + binwidth] * 2)
- - ax.transData.transform([left_edge] * 2)
- )
- if self.data_variable == "x":
- binwidth_points = pts_x
- else:
- binwidth_points = pts_y
- # The relative size of the lines depends on the appearance
- # This is a provisional value and may need more tweaking
- default_linewidth = min(.1 * binwidth_points, default_linewidth)
- # Set the attributes
- for bar in hist_artists:
- # Don't let the lines get too thick
- max_linewidth = bar.get_linewidth()
- if not fill:
- max_linewidth *= 1.5
- linewidth = min(default_linewidth, max_linewidth)
- # If not filling, don't let lines disappear
- if not fill:
- min_linewidth = .5
- linewidth = max(linewidth, min_linewidth)
- bar.set_linewidth(linewidth)
- # --- Finalize the plot ----
- # Axis labels
- ax = self.ax if self.ax is not None else self.facets.axes.flat[0]
- default_x = default_y = ""
- if self.data_variable == "x":
- default_y = estimator.stat.capitalize()
- if self.data_variable == "y":
- default_x = estimator.stat.capitalize()
- self._add_axis_labels(ax, default_x, default_y)
- # Legend for semantic variables
- if "hue" in self.variables and legend:
- if fill or element == "bars":
- artist = partial(mpl.patches.Patch)
- else:
- artist = partial(mpl.lines.Line2D, [], [])
- ax_obj = self.ax if self.ax is not None else self.facets
- self._add_legend(
- ax_obj, artist, fill, element, multiple, alpha, plot_kws, {},
- )
- def plot_bivariate_histogram(
- self,
- common_bins, common_norm,
- thresh, pthresh, pmax,
- color, legend,
- cbar, cbar_ax, cbar_kws,
- estimate_kws,
- **plot_kws,
- ):
- # Default keyword dicts
- cbar_kws = {} if cbar_kws is None else cbar_kws.copy()
- # Now initialize the Histogram estimator
- estimator = Histogram(**estimate_kws)
- # Do pre-compute housekeeping related to multiple groups
- if set(self.variables) - {"x", "y"}:
- all_data = self.comp_data.dropna()
- if common_bins:
- estimator.define_bin_params(
- all_data["x"],
- all_data["y"],
- all_data.get("weights", None),
- )
- else:
- common_norm = False
- # -- Determine colormap threshold and norm based on the full data
- full_heights = []
- for _, sub_data in self.iter_data(from_comp_data=True):
- sub_heights, _ = estimator(
- sub_data["x"], sub_data["y"], sub_data.get("weights", None)
- )
- full_heights.append(sub_heights)
- common_color_norm = not set(self.variables) - {"x", "y"} or common_norm
- if pthresh is not None and common_color_norm:
- thresh = self._quantile_to_level(full_heights, pthresh)
- plot_kws.setdefault("vmin", 0)
- if common_color_norm:
- if pmax is not None:
- vmax = self._quantile_to_level(full_heights, pmax)
- else:
- vmax = plot_kws.pop("vmax", max(map(np.max, full_heights)))
- else:
- vmax = None
- # Get a default color
- # (We won't follow the color cycle here, as multiple plots are unlikely)
- if color is None:
- color = "C0"
- # --- Loop over data (subsets) and draw the histograms
- for sub_vars, sub_data in self.iter_data("hue", from_comp_data=True):
- if sub_data.empty:
- continue
- # Do the histogram computation
- heights, (x_edges, y_edges) = estimator(
- sub_data["x"],
- sub_data["y"],
- weights=sub_data.get("weights", None),
- )
- # Get the axes for this plot
- ax = self._get_axes(sub_vars)
- # Invert the scale for the edges
- _, inv_x = _get_transform_functions(ax, "x")
- _, inv_y = _get_transform_functions(ax, "y")
- x_edges = inv_x(x_edges)
- y_edges = inv_y(y_edges)
- # Apply scaling to normalize across groups
- if estimator.stat != "count" and common_norm:
- heights *= len(sub_data) / len(all_data)
- # Define the specific kwargs for this artist
- artist_kws = plot_kws.copy()
- if "hue" in self.variables:
- color = self._hue_map(sub_vars["hue"])
- cmap = self._cmap_from_color(color)
- artist_kws["cmap"] = cmap
- else:
- cmap = artist_kws.pop("cmap", None)
- if isinstance(cmap, str):
- cmap = color_palette(cmap, as_cmap=True)
- elif cmap is None:
- cmap = self._cmap_from_color(color)
- artist_kws["cmap"] = cmap
- # Set the upper norm on the colormap
- if not common_color_norm and pmax is not None:
- vmax = self._quantile_to_level(heights, pmax)
- if vmax is not None:
- artist_kws["vmax"] = vmax
- # Make cells at or below the threshold transparent
- if not common_color_norm and pthresh:
- thresh = self._quantile_to_level(heights, pthresh)
- if thresh is not None:
- heights = np.ma.masked_less_equal(heights, thresh)
- # pcolormesh is going to turn the grid off, but we want to keep it
- # I'm not sure if there's a better way to get the grid state
- x_grid = any([l.get_visible() for l in ax.xaxis.get_gridlines()])
- y_grid = any([l.get_visible() for l in ax.yaxis.get_gridlines()])
- mesh = ax.pcolormesh(
- x_edges,
- y_edges,
- heights.T,
- **artist_kws,
- )
- # pcolormesh sets sticky edges, but we only want them if not thresholding
- if thresh is not None:
- mesh.sticky_edges.x[:] = []
- mesh.sticky_edges.y[:] = []
- # Add an optional colorbar
- # Note, we want to improve this. When hue is used, it will stack
- # multiple colorbars with redundant ticks in an ugly way.
- # But it's going to take some work to have multiple colorbars that
- # share ticks nicely.
- if cbar:
- ax.figure.colorbar(mesh, cbar_ax, ax, **cbar_kws)
- # Reset the grid state
- if x_grid:
- ax.grid(True, axis="x")
- if y_grid:
- ax.grid(True, axis="y")
- # --- Finalize the plot
- ax = self.ax if self.ax is not None else self.facets.axes.flat[0]
- self._add_axis_labels(ax)
- if "hue" in self.variables and legend:
- # TODO if possible, I would like to move the contour
- # intensity information into the legend too and label the
- # iso proportions rather than the raw density values
- artist_kws = {}
- artist = partial(mpl.patches.Patch)
- ax_obj = self.ax if self.ax is not None else self.facets
- self._add_legend(
- ax_obj, artist, True, False, "layer", 1, artist_kws, {},
- )
- def plot_univariate_density(
- self,
- multiple,
- common_norm,
- common_grid,
- warn_singular,
- fill,
- color,
- legend,
- estimate_kws,
- **plot_kws,
- ):
- # Handle conditional defaults
- if fill is None:
- fill = multiple in ("stack", "fill")
- # Preprocess the matplotlib keyword dictionaries
- if fill:
- artist = mpl.collections.PolyCollection
- else:
- artist = mpl.lines.Line2D
- plot_kws = _normalize_kwargs(plot_kws, artist)
- # Input checking
- _check_argument("multiple", ["layer", "stack", "fill"], multiple)
- # Always share the evaluation grid when stacking
- subsets = bool(set(self.variables) - {"x", "y"})
- if subsets and multiple in ("stack", "fill"):
- common_grid = True
- # Do the computation
- densities = self._compute_univariate_density(
- self.data_variable,
- common_norm,
- common_grid,
- estimate_kws,
- warn_singular,
- )
- # Adjust densities based on the `multiple` rule
- densities, baselines = self._resolve_multiple(densities, multiple)
- # Control the interaction with autoscaling by defining sticky_edges
- # i.e. we don't want autoscale margins below the density curve
- sticky_density = (0, 1) if multiple == "fill" else (0, np.inf)
- if multiple == "fill":
- # Filled plots should not have any margins
- sticky_support = densities.index.min(), densities.index.max()
- else:
- sticky_support = []
- if fill:
- if multiple == "layer":
- default_alpha = .25
- else:
- default_alpha = .75
- else:
- default_alpha = 1
- alpha = plot_kws.pop("alpha", default_alpha) # TODO make parameter?
- # Now iterate through the subsets and draw the densities
- # We go backwards so stacked densities read from top-to-bottom
- for sub_vars, _ in self.iter_data("hue", reverse=True):
- # Extract the support grid and density curve for this level
- key = tuple(sub_vars.items())
- try:
- density = densities[key]
- except KeyError:
- continue
- support = density.index
- fill_from = baselines[key]
- ax = self._get_axes(sub_vars)
- if "hue" in self.variables:
- sub_color = self._hue_map(sub_vars["hue"])
- else:
- sub_color = color
- artist_kws = self._artist_kws(
- plot_kws, fill, False, multiple, sub_color, alpha
- )
- # Either plot a curve with observation values on the x axis
- if "x" in self.variables:
- if fill:
- artist = ax.fill_between(support, fill_from, density, **artist_kws)
- else:
- artist, = ax.plot(support, density, **artist_kws)
- artist.sticky_edges.x[:] = sticky_support
- artist.sticky_edges.y[:] = sticky_density
- # Or plot a curve with observation values on the y axis
- else:
- if fill:
- artist = ax.fill_betweenx(support, fill_from, density, **artist_kws)
- else:
- artist, = ax.plot(density, support, **artist_kws)
- artist.sticky_edges.x[:] = sticky_density
- artist.sticky_edges.y[:] = sticky_support
- # --- Finalize the plot ----
- ax = self.ax if self.ax is not None else self.facets.axes.flat[0]
- default_x = default_y = ""
- if self.data_variable == "x":
- default_y = "Density"
- if self.data_variable == "y":
- default_x = "Density"
- self._add_axis_labels(ax, default_x, default_y)
- if "hue" in self.variables and legend:
- if fill:
- artist = partial(mpl.patches.Patch)
- else:
- artist = partial(mpl.lines.Line2D, [], [])
- ax_obj = self.ax if self.ax is not None else self.facets
- self._add_legend(
- ax_obj, artist, fill, False, multiple, alpha, plot_kws, {},
- )
- def plot_bivariate_density(
- self,
- common_norm,
- fill,
- levels,
- thresh,
- color,
- legend,
- cbar,
- warn_singular,
- cbar_ax,
- cbar_kws,
- estimate_kws,
- **contour_kws,
- ):
- contour_kws = contour_kws.copy()
- estimator = KDE(**estimate_kws)
- if not set(self.variables) - {"x", "y"}:
- common_norm = False
- all_data = self.plot_data.dropna()
- # Loop through the subsets and estimate the KDEs
- densities, supports = {}, {}
- for sub_vars, sub_data in self.iter_data("hue", from_comp_data=True):
- # Extract the data points from this sub set
- observations = sub_data[["x", "y"]]
- min_variance = observations.var().fillna(0).min()
- observations = observations["x"], observations["y"]
- # Extract the weights for this subset of observations
- if "weights" in self.variables:
- weights = sub_data["weights"]
- else:
- weights = None
- # Estimate the density of observations at this level
- singular = math.isclose(min_variance, 0)
- try:
- if not singular:
- density, support = estimator(*observations, weights=weights)
- except np.linalg.LinAlgError:
- # Testing for 0 variance doesn't catch all cases where scipy raises,
- # but we can also get a ValueError, so we need this convoluted approach
- singular = True
- if singular:
- msg = (
- "KDE cannot be estimated (0 variance or perfect covariance). "
- "Pass `warn_singular=False` to disable this warning."
- )
- if warn_singular:
- warnings.warn(msg, UserWarning, stacklevel=3)
- continue
- # Transform the support grid back to the original scale
- ax = self._get_axes(sub_vars)
- _, inv_x = _get_transform_functions(ax, "x")
- _, inv_y = _get_transform_functions(ax, "y")
- support = inv_x(support[0]), inv_y(support[1])
- # Apply a scaling factor so that the integral over all subsets is 1
- if common_norm:
- density *= len(sub_data) / len(all_data)
- key = tuple(sub_vars.items())
- densities[key] = density
- supports[key] = support
- # Define a grid of iso-proportion levels
- if thresh is None:
- thresh = 0
- if isinstance(levels, Number):
- levels = np.linspace(thresh, 1, levels)
- else:
- if min(levels) < 0 or max(levels) > 1:
- raise ValueError("levels must be in [0, 1]")
- # Transform from iso-proportions to iso-densities
- if common_norm:
- common_levels = self._quantile_to_level(
- list(densities.values()), levels,
- )
- draw_levels = {k: common_levels for k in densities}
- else:
- draw_levels = {
- k: self._quantile_to_level(d, levels)
- for k, d in densities.items()
- }
- # Define the coloring of the contours
- if "hue" in self.variables:
- for param in ["cmap", "colors"]:
- if param in contour_kws:
- msg = f"{param} parameter ignored when using hue mapping."
- warnings.warn(msg, UserWarning)
- contour_kws.pop(param)
- else:
- # Work out a default coloring of the contours
- coloring_given = set(contour_kws) & {"cmap", "colors"}
- if fill and not coloring_given:
- cmap = self._cmap_from_color(color)
- contour_kws["cmap"] = cmap
- if not fill and not coloring_given:
- contour_kws["colors"] = [color]
- # Use our internal colormap lookup
- cmap = contour_kws.pop("cmap", None)
- if isinstance(cmap, str):
- cmap = color_palette(cmap, as_cmap=True)
- if cmap is not None:
- contour_kws["cmap"] = cmap
- # Loop through the subsets again and plot the data
- for sub_vars, _ in self.iter_data("hue"):
- if "hue" in sub_vars:
- color = self._hue_map(sub_vars["hue"])
- if fill:
- contour_kws["cmap"] = self._cmap_from_color(color)
- else:
- contour_kws["colors"] = [color]
- ax = self._get_axes(sub_vars)
- # Choose the function to plot with
- # TODO could add a pcolormesh based option as well
- # Which would look something like element="raster"
- if fill:
- contour_func = ax.contourf
- else:
- contour_func = ax.contour
- key = tuple(sub_vars.items())
- if key not in densities:
- continue
- density = densities[key]
- xx, yy = supports[key]
- # Pop the label kwarg which is unused by contour_func (but warns)
- contour_kws.pop("label", None)
- cset = contour_func(
- xx, yy, density,
- levels=draw_levels[key],
- **contour_kws,
- )
- # Add a color bar representing the contour heights
- # Note: this shows iso densities, not iso proportions
- # See more notes in histplot about how this could be improved
- if cbar:
- cbar_kws = {} if cbar_kws is None else cbar_kws
- ax.figure.colorbar(cset, cbar_ax, ax, **cbar_kws)
- # --- Finalize the plot
- ax = self.ax if self.ax is not None else self.facets.axes.flat[0]
- self._add_axis_labels(ax)
- if "hue" in self.variables and legend:
- # TODO if possible, I would like to move the contour
- # intensity information into the legend too and label the
- # iso proportions rather than the raw density values
- artist_kws = {}
- if fill:
- artist = partial(mpl.patches.Patch)
- else:
- artist = partial(mpl.lines.Line2D, [], [])
- ax_obj = self.ax if self.ax is not None else self.facets
- self._add_legend(
- ax_obj, artist, fill, False, "layer", 1, artist_kws, {},
- )
- def plot_univariate_ecdf(self, estimate_kws, legend, **plot_kws):
- estimator = ECDF(**estimate_kws)
- # Set the draw style to step the right way for the data variable
- drawstyles = dict(x="steps-post", y="steps-pre")
- plot_kws["drawstyle"] = drawstyles[self.data_variable]
- # Loop through the subsets, transform and plot the data
- for sub_vars, sub_data in self.iter_data(
- "hue", reverse=True, from_comp_data=True,
- ):
- # Compute the ECDF
- if sub_data.empty:
- continue
- observations = sub_data[self.data_variable]
- weights = sub_data.get("weights", None)
- stat, vals = estimator(observations, weights=weights)
- # Assign attributes based on semantic mapping
- artist_kws = plot_kws.copy()
- if "hue" in self.variables:
- artist_kws["color"] = self._hue_map(sub_vars["hue"])
- # Return the data variable to the linear domain
- ax = self._get_axes(sub_vars)
- _, inv = _get_transform_functions(ax, self.data_variable)
- vals = inv(vals)
- # Manually set the minimum value on a "log" scale
- if isinstance(inv.__self__, mpl.scale.LogTransform):
- vals[0] = -np.inf
- # Work out the orientation of the plot
- if self.data_variable == "x":
- plot_args = vals, stat
- stat_variable = "y"
- else:
- plot_args = stat, vals
- stat_variable = "x"
- if estimator.stat == "count":
- top_edge = len(observations)
- else:
- top_edge = 1
- # Draw the line for this subset
- artist, = ax.plot(*plot_args, **artist_kws)
- sticky_edges = getattr(artist.sticky_edges, stat_variable)
- sticky_edges[:] = 0, top_edge
- # --- Finalize the plot ----
- ax = self.ax if self.ax is not None else self.facets.axes.flat[0]
- stat = estimator.stat.capitalize()
- default_x = default_y = ""
- if self.data_variable == "x":
- default_y = stat
- if self.data_variable == "y":
- default_x = stat
- self._add_axis_labels(ax, default_x, default_y)
- if "hue" in self.variables and legend:
- artist = partial(mpl.lines.Line2D, [], [])
- alpha = plot_kws.get("alpha", 1)
- ax_obj = self.ax if self.ax is not None else self.facets
- self._add_legend(
- ax_obj, artist, False, False, None, alpha, plot_kws, {},
- )
- def plot_rug(self, height, expand_margins, legend, **kws):
- for sub_vars, sub_data, in self.iter_data(from_comp_data=True):
- ax = self._get_axes(sub_vars)
- kws.setdefault("linewidth", 1)
- if expand_margins:
- xmarg, ymarg = ax.margins()
- if "x" in self.variables:
- ymarg += height * 2
- if "y" in self.variables:
- xmarg += height * 2
- ax.margins(x=xmarg, y=ymarg)
- if "hue" in self.variables:
- kws.pop("c", None)
- kws.pop("color", None)
- if "x" in self.variables:
- self._plot_single_rug(sub_data, "x", height, ax, kws)
- if "y" in self.variables:
- self._plot_single_rug(sub_data, "y", height, ax, kws)
- # --- Finalize the plot
- self._add_axis_labels(ax)
- if "hue" in self.variables and legend:
- # TODO ideally i'd like the legend artist to look like a rug
- legend_artist = partial(mpl.lines.Line2D, [], [])
- self._add_legend(
- ax, legend_artist, False, False, None, 1, {}, {},
- )
- def _plot_single_rug(self, sub_data, var, height, ax, kws):
- """Draw a rugplot along one axis of the plot."""
- vector = sub_data[var]
- n = len(vector)
- # Return data to linear domain
- _, inv = _get_transform_functions(ax, var)
- vector = inv(vector)
- # We'll always add a single collection with varying colors
- if "hue" in self.variables:
- colors = self._hue_map(sub_data["hue"])
- else:
- colors = None
- # Build the array of values for the LineCollection
- if var == "x":
- trans = tx.blended_transform_factory(ax.transData, ax.transAxes)
- xy_pairs = np.column_stack([
- np.repeat(vector, 2), np.tile([0, height], n)
- ])
- if var == "y":
- trans = tx.blended_transform_factory(ax.transAxes, ax.transData)
- xy_pairs = np.column_stack([
- np.tile([0, height], n), np.repeat(vector, 2)
- ])
- # Draw the lines on the plot
- line_segs = xy_pairs.reshape([n, 2, 2])
- ax.add_collection(LineCollection(
- line_segs, transform=trans, colors=colors, **kws
- ))
- ax.autoscale_view(scalex=var == "x", scaley=var == "y")
- # ==================================================================================== #
- # External API
- # ==================================================================================== #
- def histplot(
- data=None, *,
- # Vector variables
- x=None, y=None, hue=None, weights=None,
- # Histogram computation parameters
- stat="count", bins="auto", binwidth=None, binrange=None,
- discrete=None, cumulative=False, common_bins=True, common_norm=True,
- # Histogram appearance parameters
- multiple="layer", element="bars", fill=True, shrink=1,
- # Histogram smoothing with a kernel density estimate
- kde=False, kde_kws=None, line_kws=None,
- # Bivariate histogram parameters
- thresh=0, pthresh=None, pmax=None, cbar=False, cbar_ax=None, cbar_kws=None,
- # Hue mapping parameters
- palette=None, hue_order=None, hue_norm=None, color=None,
- # Axes information
- log_scale=None, legend=True, ax=None,
- # Other appearance keywords
- **kwargs,
- ):
- p = _DistributionPlotter(
- data=data,
- variables=dict(x=x, y=y, hue=hue, weights=weights),
- )
- p.map_hue(palette=palette, order=hue_order, norm=hue_norm)
- if ax is None:
- ax = plt.gca()
- p._attach(ax, log_scale=log_scale)
- if p.univariate: # Note, bivariate plots won't cycle
- if fill:
- method = ax.bar if element == "bars" else ax.fill_between
- else:
- method = ax.plot
- color = _default_color(method, hue, color, kwargs)
- if not p.has_xy_data:
- return ax
- # Default to discrete bins for categorical variables
- if discrete is None:
- discrete = p._default_discrete()
- estimate_kws = dict(
- stat=stat,
- bins=bins,
- binwidth=binwidth,
- binrange=binrange,
- discrete=discrete,
- cumulative=cumulative,
- )
- if p.univariate:
- p.plot_univariate_histogram(
- multiple=multiple,
- element=element,
- fill=fill,
- shrink=shrink,
- common_norm=common_norm,
- common_bins=common_bins,
- kde=kde,
- kde_kws=kde_kws,
- color=color,
- legend=legend,
- estimate_kws=estimate_kws,
- line_kws=line_kws,
- **kwargs,
- )
- else:
- p.plot_bivariate_histogram(
- common_bins=common_bins,
- common_norm=common_norm,
- thresh=thresh,
- pthresh=pthresh,
- pmax=pmax,
- color=color,
- legend=legend,
- cbar=cbar,
- cbar_ax=cbar_ax,
- cbar_kws=cbar_kws,
- estimate_kws=estimate_kws,
- **kwargs,
- )
- return ax
- histplot.__doc__ = """\
- Plot univariate or bivariate histograms to show distributions of datasets.
- A histogram is a classic visualization tool that represents the distribution
- of one or more variables by counting the number of observations that fall within
- discrete bins.
- This function can normalize the statistic computed within each bin to estimate
- frequency, density or probability mass, and it can add a smooth curve obtained
- using a kernel density estimate, similar to :func:`kdeplot`.
- More information is provided in the :ref:`user guide <tutorial_hist>`.
- Parameters
- ----------
- {params.core.data}
- {params.core.xy}
- {params.core.hue}
- weights : vector or key in ``data``
- If provided, weight the contribution of the corresponding data points
- towards the count in each bin by these factors.
- {params.hist.stat}
- {params.hist.bins}
- {params.hist.binwidth}
- {params.hist.binrange}
- discrete : bool
- If True, default to ``binwidth=1`` and draw the bars so that they are
- centered on their corresponding data points. This avoids "gaps" that may
- otherwise appear when using discrete (integer) data.
- cumulative : bool
- If True, plot the cumulative counts as bins increase.
- common_bins : bool
- If True, use the same bins when semantic variables produce multiple
- plots. If using a reference rule to determine the bins, it will be computed
- with the full dataset.
- common_norm : bool
- If True and using a normalized statistic, the normalization will apply over
- the full dataset. Otherwise, normalize each histogram independently.
- multiple : {{"layer", "dodge", "stack", "fill"}}
- Approach to resolving multiple elements when semantic mapping creates subsets.
- Only relevant with univariate data.
- element : {{"bars", "step", "poly"}}
- Visual representation of the histogram statistic.
- Only relevant with univariate data.
- fill : bool
- If True, fill in the space under the histogram.
- Only relevant with univariate data.
- shrink : number
- Scale the width of each bar relative to the binwidth by this factor.
- Only relevant with univariate data.
- kde : bool
- If True, compute a kernel density estimate to smooth the distribution
- and show on the plot as (one or more) line(s).
- Only relevant with univariate data.
- kde_kws : dict
- Parameters that control the KDE computation, as in :func:`kdeplot`.
- line_kws : dict
- Parameters that control the KDE visualization, passed to
- :meth:`matplotlib.axes.Axes.plot`.
- thresh : number or None
- Cells with a statistic less than or equal to this value will be transparent.
- Only relevant with bivariate data.
- pthresh : number or None
- Like ``thresh``, but a value in [0, 1] such that cells with aggregate counts
- (or other statistics, when used) up to this proportion of the total will be
- transparent.
- pmax : number or None
- A value in [0, 1] that sets that saturation point for the colormap at a value
- such that cells below constitute this proportion of the total count (or
- other statistic, when used).
- {params.dist.cbar}
- {params.dist.cbar_ax}
- {params.dist.cbar_kws}
- {params.core.palette}
- {params.core.hue_order}
- {params.core.hue_norm}
- {params.core.color}
- {params.dist.log_scale}
- {params.dist.legend}
- {params.core.ax}
- kwargs
- Other keyword arguments are passed to one of the following matplotlib
- functions:
- - :meth:`matplotlib.axes.Axes.bar` (univariate, element="bars")
- - :meth:`matplotlib.axes.Axes.fill_between` (univariate, other element, fill=True)
- - :meth:`matplotlib.axes.Axes.plot` (univariate, other element, fill=False)
- - :meth:`matplotlib.axes.Axes.pcolormesh` (bivariate)
- Returns
- -------
- {returns.ax}
- See Also
- --------
- {seealso.displot}
- {seealso.kdeplot}
- {seealso.rugplot}
- {seealso.ecdfplot}
- {seealso.jointplot}
- Notes
- -----
- The choice of bins for computing and plotting a histogram can exert
- substantial influence on the insights that one is able to draw from the
- visualization. If the bins are too large, they may erase important features.
- On the other hand, bins that are too small may be dominated by random
- variability, obscuring the shape of the true underlying distribution. The
- default bin size is determined using a reference rule that depends on the
- sample size and variance. This works well in many cases, (i.e., with
- "well-behaved" data) but it fails in others. It is always a good to try
- different bin sizes to be sure that you are not missing something important.
- This function allows you to specify bins in several different ways, such as
- by setting the total number of bins to use, the width of each bin, or the
- specific locations where the bins should break.
- Examples
- --------
- .. include:: ../docstrings/histplot.rst
- """.format(
- params=_param_docs,
- returns=_core_docs["returns"],
- seealso=_core_docs["seealso"],
- )
- def kdeplot(
- data=None, *, x=None, y=None, hue=None, weights=None,
- palette=None, hue_order=None, hue_norm=None, color=None, fill=None,
- multiple="layer", common_norm=True, common_grid=False, cumulative=False,
- bw_method="scott", bw_adjust=1, warn_singular=True, log_scale=None,
- levels=10, thresh=.05, gridsize=200, cut=3, clip=None,
- legend=True, cbar=False, cbar_ax=None, cbar_kws=None, ax=None,
- **kwargs,
- ):
- # --- Start with backwards compatability for versions < 0.11.0 ----------------
- # Handle (past) deprecation of `data2`
- if "data2" in kwargs:
- msg = "`data2` has been removed (replaced by `y`); please update your code."
- TypeError(msg)
- # Handle deprecation of `vertical`
- vertical = kwargs.pop("vertical", None)
- if vertical is not None:
- if vertical:
- action_taken = "assigning data to `y`."
- if x is None:
- data, y = y, data
- else:
- x, y = y, x
- else:
- action_taken = "assigning data to `x`."
- msg = textwrap.dedent(f"""\n
- The `vertical` parameter is deprecated; {action_taken}
- This will become an error in seaborn v0.14.0; please update your code.
- """)
- warnings.warn(msg, UserWarning, stacklevel=2)
- # Handle deprecation of `bw`
- bw = kwargs.pop("bw", None)
- if bw is not None:
- msg = textwrap.dedent(f"""\n
- The `bw` parameter is deprecated in favor of `bw_method` and `bw_adjust`.
- Setting `bw_method={bw}`, but please see the docs for the new parameters
- and update your code. This will become an error in seaborn v0.14.0.
- """)
- warnings.warn(msg, UserWarning, stacklevel=2)
- bw_method = bw
- # Handle deprecation of `kernel`
- if kwargs.pop("kernel", None) is not None:
- msg = textwrap.dedent("""\n
- Support for alternate kernels has been removed; using Gaussian kernel.
- This will become an error in seaborn v0.14.0; please update your code.
- """)
- warnings.warn(msg, UserWarning, stacklevel=2)
- # Handle deprecation of shade_lowest
- shade_lowest = kwargs.pop("shade_lowest", None)
- if shade_lowest is not None:
- if shade_lowest:
- thresh = 0
- msg = textwrap.dedent(f"""\n
- `shade_lowest` has been replaced by `thresh`; setting `thresh={thresh}.
- This will become an error in seaborn v0.14.0; please update your code.
- """)
- warnings.warn(msg, UserWarning, stacklevel=2)
- # Handle "soft" deprecation of shade `shade` is not really the right
- # terminology here, but unlike some of the other deprecated parameters it
- # is probably very commonly used and much hard to remove. This is therefore
- # going to be a longer process where, first, `fill` will be introduced and
- # be used throughout the documentation. In 0.12, when kwarg-only
- # enforcement hits, we can remove the shade/shade_lowest out of the
- # function signature all together and pull them out of the kwargs. Then we
- # can actually fire a FutureWarning, and eventually remove.
- shade = kwargs.pop("shade", None)
- if shade is not None:
- fill = shade
- msg = textwrap.dedent(f"""\n
- `shade` is now deprecated in favor of `fill`; setting `fill={shade}`.
- This will become an error in seaborn v0.14.0; please update your code.
- """)
- warnings.warn(msg, FutureWarning, stacklevel=2)
- # Handle `n_levels`
- # This was never in the formal API but it was processed, and appeared in an
- # example. We can treat as an alias for `levels` now and deprecate later.
- levels = kwargs.pop("n_levels", levels)
- # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
- p = _DistributionPlotter(
- data=data,
- variables=dict(x=x, y=y, hue=hue, weights=weights),
- )
- p.map_hue(palette=palette, order=hue_order, norm=hue_norm)
- if ax is None:
- ax = plt.gca()
- p._attach(ax, allowed_types=["numeric", "datetime"], log_scale=log_scale)
- method = ax.fill_between if fill else ax.plot
- color = _default_color(method, hue, color, kwargs)
- if not p.has_xy_data:
- return ax
- # Pack the kwargs for statistics.KDE
- estimate_kws = dict(
- bw_method=bw_method,
- bw_adjust=bw_adjust,
- gridsize=gridsize,
- cut=cut,
- clip=clip,
- cumulative=cumulative,
- )
- if p.univariate:
- plot_kws = kwargs.copy()
- p.plot_univariate_density(
- multiple=multiple,
- common_norm=common_norm,
- common_grid=common_grid,
- fill=fill,
- color=color,
- legend=legend,
- warn_singular=warn_singular,
- estimate_kws=estimate_kws,
- **plot_kws,
- )
- else:
- p.plot_bivariate_density(
- common_norm=common_norm,
- fill=fill,
- levels=levels,
- thresh=thresh,
- legend=legend,
- color=color,
- warn_singular=warn_singular,
- cbar=cbar,
- cbar_ax=cbar_ax,
- cbar_kws=cbar_kws,
- estimate_kws=estimate_kws,
- **kwargs,
- )
- return ax
- kdeplot.__doc__ = """\
- Plot univariate or bivariate distributions using kernel density estimation.
- A kernel density estimate (KDE) plot is a method for visualizing the
- distribution of observations in a dataset, analogous to a histogram. KDE
- represents the data using a continuous probability density curve in one or
- more dimensions.
- The approach is explained further in the :ref:`user guide <tutorial_kde>`.
- Relative to a histogram, KDE can produce a plot that is less cluttered and
- more interpretable, especially when drawing multiple distributions. But it
- has the potential to introduce distortions if the underlying distribution is
- bounded or not smooth. Like a histogram, the quality of the representation
- also depends on the selection of good smoothing parameters.
- Parameters
- ----------
- {params.core.data}
- {params.core.xy}
- {params.core.hue}
- weights : vector or key in ``data``
- If provided, weight the kernel density estimation using these values.
- {params.core.palette}
- {params.core.hue_order}
- {params.core.hue_norm}
- {params.core.color}
- fill : bool or None
- If True, fill in the area under univariate density curves or between
- bivariate contours. If None, the default depends on ``multiple``.
- {params.dist.multiple}
- common_norm : bool
- If True, scale each conditional density by the number of observations
- such that the total area under all densities sums to 1. Otherwise,
- normalize each density independently.
- common_grid : bool
- If True, use the same evaluation grid for each kernel density estimate.
- Only relevant with univariate data.
- {params.kde.cumulative}
- {params.kde.bw_method}
- {params.kde.bw_adjust}
- warn_singular : bool
- If True, issue a warning when trying to estimate the density of data
- with zero variance.
- {params.dist.log_scale}
- levels : int or vector
- Number of contour levels or values to draw contours at. A vector argument
- must have increasing values in [0, 1]. Levels correspond to iso-proportions
- of the density: e.g., 20% of the probability mass will lie below the
- contour drawn for 0.2. Only relevant with bivariate data.
- thresh : number in [0, 1]
- Lowest iso-proportion level at which to draw a contour line. Ignored when
- ``levels`` is a vector. Only relevant with bivariate data.
- gridsize : int
- Number of points on each dimension of the evaluation grid.
- {params.kde.cut}
- {params.kde.clip}
- {params.dist.legend}
- {params.dist.cbar}
- {params.dist.cbar_ax}
- {params.dist.cbar_kws}
- {params.core.ax}
- kwargs
- Other keyword arguments are passed to one of the following matplotlib
- functions:
- - :meth:`matplotlib.axes.Axes.plot` (univariate, ``fill=False``),
- - :meth:`matplotlib.axes.Axes.fill_between` (univariate, ``fill=True``),
- - :meth:`matplotlib.axes.Axes.contour` (bivariate, ``fill=False``),
- - :meth:`matplotlib.axes.contourf` (bivariate, ``fill=True``).
- Returns
- -------
- {returns.ax}
- See Also
- --------
- {seealso.displot}
- {seealso.histplot}
- {seealso.ecdfplot}
- {seealso.jointplot}
- {seealso.violinplot}
- Notes
- -----
- The *bandwidth*, or standard deviation of the smoothing kernel, is an
- important parameter. Misspecification of the bandwidth can produce a
- distorted representation of the data. Much like the choice of bin width in a
- histogram, an over-smoothed curve can erase true features of a
- distribution, while an under-smoothed curve can create false features out of
- random variability. The rule-of-thumb that sets the default bandwidth works
- best when the true distribution is smooth, unimodal, and roughly bell-shaped.
- It is always a good idea to check the default behavior by using ``bw_adjust``
- to increase or decrease the amount of smoothing.
- Because the smoothing algorithm uses a Gaussian kernel, the estimated density
- curve can extend to values that do not make sense for a particular dataset.
- For example, the curve may be drawn over negative values when smoothing data
- that are naturally positive. The ``cut`` and ``clip`` parameters can be used
- to control the extent of the curve, but datasets that have many observations
- close to a natural boundary may be better served by a different visualization
- method.
- Similar considerations apply when a dataset is naturally discrete or "spiky"
- (containing many repeated observations of the same value). Kernel density
- estimation will always produce a smooth curve, which would be misleading
- in these situations.
- The units on the density axis are a common source of confusion. While kernel
- density estimation produces a probability distribution, the height of the curve
- at each point gives a density, not a probability. A probability can be obtained
- only by integrating the density across a range. The curve is normalized so
- that the integral over all possible values is 1, meaning that the scale of
- the density axis depends on the data values.
- Examples
- --------
- .. include:: ../docstrings/kdeplot.rst
- """.format(
- params=_param_docs,
- returns=_core_docs["returns"],
- seealso=_core_docs["seealso"],
- )
- def ecdfplot(
- data=None, *,
- # Vector variables
- x=None, y=None, hue=None, weights=None,
- # Computation parameters
- stat="proportion", complementary=False,
- # Hue mapping parameters
- palette=None, hue_order=None, hue_norm=None,
- # Axes information
- log_scale=None, legend=True, ax=None,
- # Other appearance keywords
- **kwargs,
- ):
- p = _DistributionPlotter(
- data=data,
- variables=dict(x=x, y=y, hue=hue, weights=weights),
- )
- p.map_hue(palette=palette, order=hue_order, norm=hue_norm)
- # We could support other semantics (size, style) here fairly easily
- # But it would make distplot a bit more complicated.
- # It's always possible to add features like that later, so I am going to defer.
- # It will be even easier to wait until after there is a more general/abstract
- # way to go from semantic specs to artist attributes.
- if ax is None:
- ax = plt.gca()
- p._attach(ax, log_scale=log_scale)
- color = kwargs.pop("color", kwargs.pop("c", None))
- kwargs["color"] = _default_color(ax.plot, hue, color, kwargs)
- if not p.has_xy_data:
- return ax
- # We could add this one day, but it's of dubious value
- if not p.univariate:
- raise NotImplementedError("Bivariate ECDF plots are not implemented")
- estimate_kws = dict(
- stat=stat,
- complementary=complementary,
- )
- p.plot_univariate_ecdf(
- estimate_kws=estimate_kws,
- legend=legend,
- **kwargs,
- )
- return ax
- ecdfplot.__doc__ = """\
- Plot empirical cumulative distribution functions.
- An ECDF represents the proportion or count of observations falling below each
- unique value in a dataset. Compared to a histogram or density plot, it has the
- advantage that each observation is visualized directly, meaning that there are
- no binning or smoothing parameters that need to be adjusted. It also aids direct
- comparisons between multiple distributions. A downside is that the relationship
- between the appearance of the plot and the basic properties of the distribution
- (such as its central tendency, variance, and the presence of any bimodality)
- may not be as intuitive.
- More information is provided in the :ref:`user guide <tutorial_ecdf>`.
- Parameters
- ----------
- {params.core.data}
- {params.core.xy}
- {params.core.hue}
- weights : vector or key in ``data``
- If provided, weight the contribution of the corresponding data points
- towards the cumulative distribution using these values.
- {params.ecdf.stat}
- {params.ecdf.complementary}
- {params.core.palette}
- {params.core.hue_order}
- {params.core.hue_norm}
- {params.dist.log_scale}
- {params.dist.legend}
- {params.core.ax}
- kwargs
- Other keyword arguments are passed to :meth:`matplotlib.axes.Axes.plot`.
- Returns
- -------
- {returns.ax}
- See Also
- --------
- {seealso.displot}
- {seealso.histplot}
- {seealso.kdeplot}
- {seealso.rugplot}
- Examples
- --------
- .. include:: ../docstrings/ecdfplot.rst
- """.format(
- params=_param_docs,
- returns=_core_docs["returns"],
- seealso=_core_docs["seealso"],
- )
- def rugplot(
- data=None, *, x=None, y=None, hue=None, height=.025, expand_margins=True,
- palette=None, hue_order=None, hue_norm=None, legend=True, ax=None, **kwargs
- ):
- # A note: I think it would make sense to add multiple= to rugplot and allow
- # rugs for different hue variables to be shifted orthogonal to the data axis
- # But is this stacking, or dodging?
- # A note: if we want to add a style semantic to rugplot,
- # we could make an option that draws the rug using scatterplot
- # A note, it would also be nice to offer some kind of histogram/density
- # rugplot, since alpha blending doesn't work great in the large n regime
- # --- Start with backwards compatability for versions < 0.11.0 ----------------
- a = kwargs.pop("a", None)
- axis = kwargs.pop("axis", None)
- if a is not None:
- data = a
- msg = textwrap.dedent("""\n
- The `a` parameter has been replaced; use `x`, `y`, and/or `data` instead.
- Please update your code; This will become an error in seaborn v0.14.0.
- """)
- warnings.warn(msg, UserWarning, stacklevel=2)
- if axis is not None:
- if axis == "x":
- x = data
- elif axis == "y":
- y = data
- data = None
- msg = textwrap.dedent(f"""\n
- The `axis` parameter has been deprecated; use the `{axis}` parameter instead.
- Please update your code; this will become an error in seaborn v0.14.0.
- """)
- warnings.warn(msg, UserWarning, stacklevel=2)
- vertical = kwargs.pop("vertical", None)
- if vertical is not None:
- if vertical:
- action_taken = "assigning data to `y`."
- if x is None:
- data, y = y, data
- else:
- x, y = y, x
- else:
- action_taken = "assigning data to `x`."
- msg = textwrap.dedent(f"""\n
- The `vertical` parameter is deprecated; {action_taken}
- This will become an error in seaborn v0.14.0; please update your code.
- """)
- warnings.warn(msg, UserWarning, stacklevel=2)
- # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - #
- p = _DistributionPlotter(
- data=data,
- variables=dict(x=x, y=y, hue=hue),
- )
- p.map_hue(palette=palette, order=hue_order, norm=hue_norm)
- if ax is None:
- ax = plt.gca()
- p._attach(ax)
- color = kwargs.pop("color", kwargs.pop("c", None))
- kwargs["color"] = _default_color(ax.plot, hue, color, kwargs)
- if not p.has_xy_data:
- return ax
- p.plot_rug(height, expand_margins, legend, **kwargs)
- return ax
- rugplot.__doc__ = """\
- Plot marginal distributions by drawing ticks along the x and y axes.
- This function is intended to complement other plots by showing the location
- of individual observations in an unobtrusive way.
- Parameters
- ----------
- {params.core.data}
- {params.core.xy}
- {params.core.hue}
- height : float
- Proportion of axes extent covered by each rug element. Can be negative.
- expand_margins : bool
- If True, increase the axes margins by the height of the rug to avoid
- overlap with other elements.
- {params.core.palette}
- {params.core.hue_order}
- {params.core.hue_norm}
- legend : bool
- If False, do not add a legend for semantic variables.
- {params.core.ax}
- kwargs
- Other keyword arguments are passed to
- :meth:`matplotlib.collections.LineCollection`
- Returns
- -------
- {returns.ax}
- Examples
- --------
- .. include:: ../docstrings/rugplot.rst
- """.format(
- params=_param_docs,
- returns=_core_docs["returns"],
- )
- def displot(
- data=None, *,
- # Vector variables
- x=None, y=None, hue=None, row=None, col=None, weights=None,
- # Other plot parameters
- kind="hist", rug=False, rug_kws=None, log_scale=None, legend=True,
- # Hue-mapping parameters
- palette=None, hue_order=None, hue_norm=None, color=None,
- # Faceting parameters
- col_wrap=None, row_order=None, col_order=None,
- height=5, aspect=1, facet_kws=None,
- **kwargs,
- ):
- p = _DistributionPlotter(
- data=data,
- variables=dict(x=x, y=y, hue=hue, weights=weights, row=row, col=col),
- )
- p.map_hue(palette=palette, order=hue_order, norm=hue_norm)
- _check_argument("kind", ["hist", "kde", "ecdf"], kind)
- # --- Initialize the FacetGrid object
- # Check for attempt to plot onto specific axes and warn
- if "ax" in kwargs:
- msg = (
- "`displot` is a figure-level function and does not accept "
- "the ax= parameter. You may wish to try {}plot.".format(kind)
- )
- warnings.warn(msg, UserWarning)
- kwargs.pop("ax")
- for var in ["row", "col"]:
- # Handle faceting variables that lack name information
- if var in p.variables and p.variables[var] is None:
- p.variables[var] = f"_{var}_"
- # Adapt the plot_data dataframe for use with FacetGrid
- grid_data = p.plot_data.rename(columns=p.variables)
- grid_data = grid_data.loc[:, ~grid_data.columns.duplicated()]
- col_name = p.variables.get("col")
- row_name = p.variables.get("row")
- if facet_kws is None:
- facet_kws = {}
- g = FacetGrid(
- data=grid_data, row=row_name, col=col_name,
- col_wrap=col_wrap, row_order=row_order,
- col_order=col_order, height=height,
- aspect=aspect,
- **facet_kws,
- )
- # Now attach the axes object to the plotter object
- if kind == "kde":
- allowed_types = ["numeric", "datetime"]
- else:
- allowed_types = None
- p._attach(g, allowed_types=allowed_types, log_scale=log_scale)
- # Check for a specification that lacks x/y data and return early
- if not p.has_xy_data:
- return g
- if color is None and hue is None:
- color = "C0"
- # XXX else warn if hue is not None?
- kwargs["legend"] = legend
- # --- Draw the plots
- if kind == "hist":
- hist_kws = kwargs.copy()
- # Extract the parameters that will go directly to Histogram
- estimate_defaults = {}
- _assign_default_kwargs(estimate_defaults, Histogram.__init__, histplot)
- estimate_kws = {}
- for key, default_val in estimate_defaults.items():
- estimate_kws[key] = hist_kws.pop(key, default_val)
- # Handle derivative defaults
- if estimate_kws["discrete"] is None:
- estimate_kws["discrete"] = p._default_discrete()
- hist_kws["estimate_kws"] = estimate_kws
- hist_kws.setdefault("color", color)
- if p.univariate:
- _assign_default_kwargs(hist_kws, p.plot_univariate_histogram, histplot)
- p.plot_univariate_histogram(**hist_kws)
- else:
- _assign_default_kwargs(hist_kws, p.plot_bivariate_histogram, histplot)
- p.plot_bivariate_histogram(**hist_kws)
- elif kind == "kde":
- kde_kws = kwargs.copy()
- # Extract the parameters that will go directly to KDE
- estimate_defaults = {}
- _assign_default_kwargs(estimate_defaults, KDE.__init__, kdeplot)
- estimate_kws = {}
- for key, default_val in estimate_defaults.items():
- estimate_kws[key] = kde_kws.pop(key, default_val)
- kde_kws["estimate_kws"] = estimate_kws
- kde_kws["color"] = color
- if p.univariate:
- _assign_default_kwargs(kde_kws, p.plot_univariate_density, kdeplot)
- p.plot_univariate_density(**kde_kws)
- else:
- _assign_default_kwargs(kde_kws, p.plot_bivariate_density, kdeplot)
- p.plot_bivariate_density(**kde_kws)
- elif kind == "ecdf":
- ecdf_kws = kwargs.copy()
- # Extract the parameters that will go directly to the estimator
- estimate_kws = {}
- estimate_defaults = {}
- _assign_default_kwargs(estimate_defaults, ECDF.__init__, ecdfplot)
- for key, default_val in estimate_defaults.items():
- estimate_kws[key] = ecdf_kws.pop(key, default_val)
- ecdf_kws["estimate_kws"] = estimate_kws
- ecdf_kws["color"] = color
- if p.univariate:
- _assign_default_kwargs(ecdf_kws, p.plot_univariate_ecdf, ecdfplot)
- p.plot_univariate_ecdf(**ecdf_kws)
- else:
- raise NotImplementedError("Bivariate ECDF plots are not implemented")
- # All plot kinds can include a rug
- if rug:
- # TODO with expand_margins=True, each facet expands margins... annoying!
- if rug_kws is None:
- rug_kws = {}
- _assign_default_kwargs(rug_kws, p.plot_rug, rugplot)
- rug_kws["legend"] = False
- if color is not None:
- rug_kws["color"] = color
- p.plot_rug(**rug_kws)
- # Call FacetGrid annotation methods
- # Note that the legend is currently set inside the plotting method
- g.set_axis_labels(
- x_var=p.variables.get("x", g.axes.flat[0].get_xlabel()),
- y_var=p.variables.get("y", g.axes.flat[0].get_ylabel()),
- )
- g.set_titles()
- g.tight_layout()
- if data is not None and (x is not None or y is not None):
- if not isinstance(data, pd.DataFrame):
- data = pd.DataFrame(data)
- g.data = pd.merge(
- data,
- g.data[g.data.columns.difference(data.columns)],
- left_index=True,
- right_index=True,
- )
- else:
- wide_cols = {
- k: f"_{k}_" if v is None else v for k, v in p.variables.items()
- }
- g.data = p.plot_data.rename(columns=wide_cols)
- return g
- displot.__doc__ = """\
- Figure-level interface for drawing distribution plots onto a FacetGrid.
- This function provides access to several approaches for visualizing the
- univariate or bivariate distribution of data, including subsets of data
- defined by semantic mapping and faceting across multiple subplots. The
- ``kind`` parameter selects the approach to use:
- - :func:`histplot` (with ``kind="hist"``; the default)
- - :func:`kdeplot` (with ``kind="kde"``)
- - :func:`ecdfplot` (with ``kind="ecdf"``; univariate-only)
- Additionally, a :func:`rugplot` can be added to any kind of plot to show
- individual observations.
- Extra keyword arguments are passed to the underlying function, so you should
- refer to the documentation for each to understand the complete set of options
- for making plots with this interface.
- See the :doc:`distribution plots tutorial <../tutorial/distributions>` for a more
- in-depth discussion of the relative strengths and weaknesses of each approach.
- The distinction between figure-level and axes-level functions is explained
- further in the :doc:`user guide <../tutorial/function_overview>`.
- Parameters
- ----------
- {params.core.data}
- {params.core.xy}
- {params.core.hue}
- {params.facets.rowcol}
- weights : vector or key in ``data``
- Observation weights used for computing the distribution function.
- kind : {{"hist", "kde", "ecdf"}}
- Approach for visualizing the data. Selects the underlying plotting function
- and determines the additional set of valid parameters.
- rug : bool
- If True, show each observation with marginal ticks (as in :func:`rugplot`).
- rug_kws : dict
- Parameters to control the appearance of the rug plot.
- {params.dist.log_scale}
- {params.dist.legend}
- {params.core.palette}
- {params.core.hue_order}
- {params.core.hue_norm}
- {params.core.color}
- {params.facets.col_wrap}
- {params.facets.rowcol_order}
- {params.facets.height}
- {params.facets.aspect}
- {params.facets.facet_kws}
- kwargs
- Other keyword arguments are documented with the relevant axes-level function:
- - :func:`histplot` (with ``kind="hist"``)
- - :func:`kdeplot` (with ``kind="kde"``)
- - :func:`ecdfplot` (with ``kind="ecdf"``)
- Returns
- -------
- {returns.facetgrid}
- See Also
- --------
- {seealso.histplot}
- {seealso.kdeplot}
- {seealso.rugplot}
- {seealso.ecdfplot}
- {seealso.jointplot}
- Examples
- --------
- See the API documentation for the axes-level functions for more details
- about the breadth of options available for each plot kind.
- .. include:: ../docstrings/displot.rst
- """.format(
- params=_param_docs,
- returns=_core_docs["returns"],
- seealso=_core_docs["seealso"],
- )
- # =========================================================================== #
- # DEPRECATED FUNCTIONS LIVE BELOW HERE
- # =========================================================================== #
- def _freedman_diaconis_bins(a):
- """Calculate number of hist bins using Freedman-Diaconis rule."""
- # From https://stats.stackexchange.com/questions/798/
- a = np.asarray(a)
- if len(a) < 2:
- return 1
- iqr = np.subtract.reduce(np.nanpercentile(a, [75, 25]))
- h = 2 * iqr / (len(a) ** (1 / 3))
- # fall back to sqrt(a) bins if iqr is 0
- if h == 0:
- return int(np.sqrt(a.size))
- else:
- return int(np.ceil((a.max() - a.min()) / h))
- def distplot(a=None, bins=None, hist=True, kde=True, rug=False, fit=None,
- hist_kws=None, kde_kws=None, rug_kws=None, fit_kws=None,
- color=None, vertical=False, norm_hist=False, axlabel=None,
- label=None, ax=None, x=None):
- """
- DEPRECATED
- This function has been deprecated and will be removed in seaborn v0.14.0.
- It has been replaced by :func:`histplot` and :func:`displot`, two functions
- with a modern API and many more capabilities.
- For a guide to updating, please see this notebook:
- https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751
- """
- if kde and not hist:
- axes_level_suggestion = (
- "`kdeplot` (an axes-level function for kernel density plots)"
- )
- else:
- axes_level_suggestion = (
- "`histplot` (an axes-level function for histograms)"
- )
- msg = textwrap.dedent(f"""
- `distplot` is a deprecated function and will be removed in seaborn v0.14.0.
- Please adapt your code to use either `displot` (a figure-level function with
- similar flexibility) or {axes_level_suggestion}.
- For a guide to updating your code to use the new functions, please see
- https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751
- """)
- warnings.warn(msg, UserWarning, stacklevel=2)
- if ax is None:
- ax = plt.gca()
- # Intelligently label the support axis
- label_ax = bool(axlabel)
- if axlabel is None and hasattr(a, "name"):
- axlabel = a.name
- if axlabel is not None:
- label_ax = True
- # Support new-style API
- if x is not None:
- a = x
- # Make a a 1-d float array
- a = np.asarray(a, float)
- if a.ndim > 1:
- a = a.squeeze()
- # Drop null values from array
- a = remove_na(a)
- # Decide if the hist is normed
- norm_hist = norm_hist or kde or (fit is not None)
- # Handle dictionary defaults
- hist_kws = {} if hist_kws is None else hist_kws.copy()
- kde_kws = {} if kde_kws is None else kde_kws.copy()
- rug_kws = {} if rug_kws is None else rug_kws.copy()
- fit_kws = {} if fit_kws is None else fit_kws.copy()
- # Get the color from the current color cycle
- if color is None:
- if vertical:
- line, = ax.plot(0, a.mean())
- else:
- line, = ax.plot(a.mean(), 0)
- color = line.get_color()
- line.remove()
- # Plug the label into the right kwarg dictionary
- if label is not None:
- if hist:
- hist_kws["label"] = label
- elif kde:
- kde_kws["label"] = label
- elif rug:
- rug_kws["label"] = label
- elif fit:
- fit_kws["label"] = label
- if hist:
- if bins is None:
- bins = min(_freedman_diaconis_bins(a), 50)
- hist_kws.setdefault("alpha", 0.4)
- hist_kws.setdefault("density", norm_hist)
- orientation = "horizontal" if vertical else "vertical"
- hist_color = hist_kws.pop("color", color)
- ax.hist(a, bins, orientation=orientation,
- color=hist_color, **hist_kws)
- if hist_color != color:
- hist_kws["color"] = hist_color
- axis = "y" if vertical else "x"
- if kde:
- kde_color = kde_kws.pop("color", color)
- kdeplot(**{axis: a}, ax=ax, color=kde_color, **kde_kws)
- if kde_color != color:
- kde_kws["color"] = kde_color
- if rug:
- rug_color = rug_kws.pop("color", color)
- rugplot(**{axis: a}, ax=ax, color=rug_color, **rug_kws)
- if rug_color != color:
- rug_kws["color"] = rug_color
- if fit is not None:
- def pdf(x):
- return fit.pdf(x, *params)
- fit_color = fit_kws.pop("color", "#282828")
- gridsize = fit_kws.pop("gridsize", 200)
- cut = fit_kws.pop("cut", 3)
- clip = fit_kws.pop("clip", (-np.inf, np.inf))
- bw = gaussian_kde(a).scotts_factor() * a.std(ddof=1)
- x = _kde_support(a, bw, gridsize, cut, clip)
- params = fit.fit(a)
- y = pdf(x)
- if vertical:
- x, y = y, x
- ax.plot(x, y, color=fit_color, **fit_kws)
- if fit_color != "#282828":
- fit_kws["color"] = fit_color
- if label_ax:
- if vertical:
- ax.set_ylabel(axlabel)
- else:
- ax.set_xlabel(axlabel)
- return ax
|