_core.py 63 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864
  1. from __future__ import annotations
  2. import importlib
  3. import types
  4. from typing import (
  5. TYPE_CHECKING,
  6. Sequence,
  7. )
  8. from pandas._config import get_option
  9. from pandas._typing import IndexLabel
  10. from pandas.util._decorators import (
  11. Appender,
  12. Substitution,
  13. )
  14. from pandas.core.dtypes.common import (
  15. is_integer,
  16. is_list_like,
  17. )
  18. from pandas.core.dtypes.generic import (
  19. ABCDataFrame,
  20. ABCSeries,
  21. )
  22. from pandas.core.base import PandasObject
  23. if TYPE_CHECKING:
  24. from matplotlib.axes import Axes
  25. from pandas import DataFrame
  26. def hist_series(
  27. self,
  28. by=None,
  29. ax=None,
  30. grid: bool = True,
  31. xlabelsize: int | None = None,
  32. xrot: float | None = None,
  33. ylabelsize: int | None = None,
  34. yrot: float | None = None,
  35. figsize: tuple[int, int] | None = None,
  36. bins: int | Sequence[int] = 10,
  37. backend: str | None = None,
  38. legend: bool = False,
  39. **kwargs,
  40. ):
  41. """
  42. Draw histogram of the input series using matplotlib.
  43. Parameters
  44. ----------
  45. by : object, optional
  46. If passed, then used to form histograms for separate groups.
  47. ax : matplotlib axis object
  48. If not passed, uses gca().
  49. grid : bool, default True
  50. Whether to show axis grid lines.
  51. xlabelsize : int, default None
  52. If specified changes the x-axis label size.
  53. xrot : float, default None
  54. Rotation of x axis labels.
  55. ylabelsize : int, default None
  56. If specified changes the y-axis label size.
  57. yrot : float, default None
  58. Rotation of y axis labels.
  59. figsize : tuple, default None
  60. Figure size in inches by default.
  61. bins : int or sequence, default 10
  62. Number of histogram bins to be used. If an integer is given, bins + 1
  63. bin edges are calculated and returned. If bins is a sequence, gives
  64. bin edges, including left edge of first bin and right edge of last
  65. bin. In this case, bins is returned unmodified.
  66. backend : str, default None
  67. Backend to use instead of the backend specified in the option
  68. ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
  69. specify the ``plotting.backend`` for the whole session, set
  70. ``pd.options.plotting.backend``.
  71. legend : bool, default False
  72. Whether to show the legend.
  73. .. versionadded:: 1.1.0
  74. **kwargs
  75. To be passed to the actual plotting function.
  76. Returns
  77. -------
  78. matplotlib.AxesSubplot
  79. A histogram plot.
  80. See Also
  81. --------
  82. matplotlib.axes.Axes.hist : Plot a histogram using matplotlib.
  83. """
  84. plot_backend = _get_plot_backend(backend)
  85. return plot_backend.hist_series(
  86. self,
  87. by=by,
  88. ax=ax,
  89. grid=grid,
  90. xlabelsize=xlabelsize,
  91. xrot=xrot,
  92. ylabelsize=ylabelsize,
  93. yrot=yrot,
  94. figsize=figsize,
  95. bins=bins,
  96. legend=legend,
  97. **kwargs,
  98. )
  99. def hist_frame(
  100. data: DataFrame,
  101. column: IndexLabel = None,
  102. by=None,
  103. grid: bool = True,
  104. xlabelsize: int | None = None,
  105. xrot: float | None = None,
  106. ylabelsize: int | None = None,
  107. yrot: float | None = None,
  108. ax=None,
  109. sharex: bool = False,
  110. sharey: bool = False,
  111. figsize: tuple[int, int] | None = None,
  112. layout: tuple[int, int] | None = None,
  113. bins: int | Sequence[int] = 10,
  114. backend: str | None = None,
  115. legend: bool = False,
  116. **kwargs,
  117. ):
  118. """
  119. Make a histogram of the DataFrame's columns.
  120. A `histogram`_ is a representation of the distribution of data.
  121. This function calls :meth:`matplotlib.pyplot.hist`, on each series in
  122. the DataFrame, resulting in one histogram per column.
  123. .. _histogram: https://en.wikipedia.org/wiki/Histogram
  124. Parameters
  125. ----------
  126. data : DataFrame
  127. The pandas object holding the data.
  128. column : str or sequence, optional
  129. If passed, will be used to limit data to a subset of columns.
  130. by : object, optional
  131. If passed, then used to form histograms for separate groups.
  132. grid : bool, default True
  133. Whether to show axis grid lines.
  134. xlabelsize : int, default None
  135. If specified changes the x-axis label size.
  136. xrot : float, default None
  137. Rotation of x axis labels. For example, a value of 90 displays the
  138. x labels rotated 90 degrees clockwise.
  139. ylabelsize : int, default None
  140. If specified changes the y-axis label size.
  141. yrot : float, default None
  142. Rotation of y axis labels. For example, a value of 90 displays the
  143. y labels rotated 90 degrees clockwise.
  144. ax : Matplotlib axes object, default None
  145. The axes to plot the histogram on.
  146. sharex : bool, default True if ax is None else False
  147. In case subplots=True, share x axis and set some x axis labels to
  148. invisible; defaults to True if ax is None otherwise False if an ax
  149. is passed in.
  150. Note that passing in both an ax and sharex=True will alter all x axis
  151. labels for all subplots in a figure.
  152. sharey : bool, default False
  153. In case subplots=True, share y axis and set some y axis labels to
  154. invisible.
  155. figsize : tuple, optional
  156. The size in inches of the figure to create. Uses the value in
  157. `matplotlib.rcParams` by default.
  158. layout : tuple, optional
  159. Tuple of (rows, columns) for the layout of the histograms.
  160. bins : int or sequence, default 10
  161. Number of histogram bins to be used. If an integer is given, bins + 1
  162. bin edges are calculated and returned. If bins is a sequence, gives
  163. bin edges, including left edge of first bin and right edge of last
  164. bin. In this case, bins is returned unmodified.
  165. backend : str, default None
  166. Backend to use instead of the backend specified in the option
  167. ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
  168. specify the ``plotting.backend`` for the whole session, set
  169. ``pd.options.plotting.backend``.
  170. legend : bool, default False
  171. Whether to show the legend.
  172. .. versionadded:: 1.1.0
  173. **kwargs
  174. All other plotting keyword arguments to be passed to
  175. :meth:`matplotlib.pyplot.hist`.
  176. Returns
  177. -------
  178. matplotlib.AxesSubplot or numpy.ndarray of them
  179. See Also
  180. --------
  181. matplotlib.pyplot.hist : Plot a histogram using matplotlib.
  182. Examples
  183. --------
  184. This example draws a histogram based on the length and width of
  185. some animals, displayed in three bins
  186. .. plot::
  187. :context: close-figs
  188. >>> df = pd.DataFrame({
  189. ... 'length': [1.5, 0.5, 1.2, 0.9, 3],
  190. ... 'width': [0.7, 0.2, 0.15, 0.2, 1.1]
  191. ... }, index=['pig', 'rabbit', 'duck', 'chicken', 'horse'])
  192. >>> hist = df.hist(bins=3)
  193. """
  194. plot_backend = _get_plot_backend(backend)
  195. return plot_backend.hist_frame(
  196. data,
  197. column=column,
  198. by=by,
  199. grid=grid,
  200. xlabelsize=xlabelsize,
  201. xrot=xrot,
  202. ylabelsize=ylabelsize,
  203. yrot=yrot,
  204. ax=ax,
  205. sharex=sharex,
  206. sharey=sharey,
  207. figsize=figsize,
  208. layout=layout,
  209. legend=legend,
  210. bins=bins,
  211. **kwargs,
  212. )
  213. _boxplot_doc = """
  214. Make a box plot from DataFrame columns.
  215. Make a box-and-whisker plot from DataFrame columns, optionally grouped
  216. by some other columns. A box plot is a method for graphically depicting
  217. groups of numerical data through their quartiles.
  218. The box extends from the Q1 to Q3 quartile values of the data,
  219. with a line at the median (Q2). The whiskers extend from the edges
  220. of box to show the range of the data. By default, they extend no more than
  221. `1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box, ending at the farthest
  222. data point within that interval. Outliers are plotted as separate dots.
  223. For further details see
  224. Wikipedia's entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`_.
  225. Parameters
  226. ----------
  227. %(data)s\
  228. column : str or list of str, optional
  229. Column name or list of names, or vector.
  230. Can be any valid input to :meth:`pandas.DataFrame.groupby`.
  231. by : str or array-like, optional
  232. Column in the DataFrame to :meth:`pandas.DataFrame.groupby`.
  233. One box-plot will be done per value of columns in `by`.
  234. ax : object of class matplotlib.axes.Axes, optional
  235. The matplotlib axes to be used by boxplot.
  236. fontsize : float or str
  237. Tick label font size in points or as a string (e.g., `large`).
  238. rot : float, default 0
  239. The rotation angle of labels (in degrees)
  240. with respect to the screen coordinate system.
  241. grid : bool, default True
  242. Setting this to True will show the grid.
  243. figsize : A tuple (width, height) in inches
  244. The size of the figure to create in matplotlib.
  245. layout : tuple (rows, columns), optional
  246. For example, (3, 5) will display the subplots
  247. using 3 rows and 5 columns, starting from the top-left.
  248. return_type : {'axes', 'dict', 'both'} or None, default 'axes'
  249. The kind of object to return. The default is ``axes``.
  250. * 'axes' returns the matplotlib axes the boxplot is drawn on.
  251. * 'dict' returns a dictionary whose values are the matplotlib
  252. Lines of the boxplot.
  253. * 'both' returns a namedtuple with the axes and dict.
  254. * when grouping with ``by``, a Series mapping columns to
  255. ``return_type`` is returned.
  256. If ``return_type`` is `None`, a NumPy array
  257. of axes with the same shape as ``layout`` is returned.
  258. %(backend)s\
  259. **kwargs
  260. All other plotting keyword arguments to be passed to
  261. :func:`matplotlib.pyplot.boxplot`.
  262. Returns
  263. -------
  264. result
  265. See Notes.
  266. See Also
  267. --------
  268. pandas.Series.plot.hist: Make a histogram.
  269. matplotlib.pyplot.boxplot : Matplotlib equivalent plot.
  270. Notes
  271. -----
  272. The return type depends on the `return_type` parameter:
  273. * 'axes' : object of class matplotlib.axes.Axes
  274. * 'dict' : dict of matplotlib.lines.Line2D objects
  275. * 'both' : a namedtuple with structure (ax, lines)
  276. For data grouped with ``by``, return a Series of the above or a numpy
  277. array:
  278. * :class:`~pandas.Series`
  279. * :class:`~numpy.array` (for ``return_type = None``)
  280. Use ``return_type='dict'`` when you want to tweak the appearance
  281. of the lines after plotting. In this case a dict containing the Lines
  282. making up the boxes, caps, fliers, medians, and whiskers is returned.
  283. Examples
  284. --------
  285. Boxplots can be created for every column in the dataframe
  286. by ``df.boxplot()`` or indicating the columns to be used:
  287. .. plot::
  288. :context: close-figs
  289. >>> np.random.seed(1234)
  290. >>> df = pd.DataFrame(np.random.randn(10, 4),
  291. ... columns=['Col1', 'Col2', 'Col3', 'Col4'])
  292. >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) # doctest: +SKIP
  293. Boxplots of variables distributions grouped by the values of a third
  294. variable can be created using the option ``by``. For instance:
  295. .. plot::
  296. :context: close-figs
  297. >>> df = pd.DataFrame(np.random.randn(10, 2),
  298. ... columns=['Col1', 'Col2'])
  299. >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',
  300. ... 'B', 'B', 'B', 'B', 'B'])
  301. >>> boxplot = df.boxplot(by='X')
  302. A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot
  303. in order to group the data by combination of the variables in the x-axis:
  304. .. plot::
  305. :context: close-figs
  306. >>> df = pd.DataFrame(np.random.randn(10, 3),
  307. ... columns=['Col1', 'Col2', 'Col3'])
  308. >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A',
  309. ... 'B', 'B', 'B', 'B', 'B'])
  310. >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A',
  311. ... 'B', 'A', 'B', 'A', 'B'])
  312. >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y'])
  313. The layout of boxplot can be adjusted giving a tuple to ``layout``:
  314. .. plot::
  315. :context: close-figs
  316. >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
  317. ... layout=(2, 1))
  318. Additional formatting can be done to the boxplot, like suppressing the grid
  319. (``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``)
  320. or changing the fontsize (i.e. ``fontsize=15``):
  321. .. plot::
  322. :context: close-figs
  323. >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) # doctest: +SKIP
  324. The parameter ``return_type`` can be used to select the type of element
  325. returned by `boxplot`. When ``return_type='axes'`` is selected,
  326. the matplotlib axes on which the boxplot is drawn are returned:
  327. >>> boxplot = df.boxplot(column=['Col1', 'Col2'], return_type='axes')
  328. >>> type(boxplot)
  329. <class 'matplotlib.axes._subplots.AxesSubplot'>
  330. When grouping with ``by``, a Series mapping columns to ``return_type``
  331. is returned:
  332. >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
  333. ... return_type='axes')
  334. >>> type(boxplot)
  335. <class 'pandas.core.series.Series'>
  336. If ``return_type`` is `None`, a NumPy array of axes with the same shape
  337. as ``layout`` is returned:
  338. >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X',
  339. ... return_type=None)
  340. >>> type(boxplot)
  341. <class 'numpy.ndarray'>
  342. """
  343. _backend_doc = """\
  344. backend : str, default None
  345. Backend to use instead of the backend specified in the option
  346. ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
  347. specify the ``plotting.backend`` for the whole session, set
  348. ``pd.options.plotting.backend``.
  349. """
  350. _bar_or_line_doc = """
  351. Parameters
  352. ----------
  353. x : label or position, optional
  354. Allows plotting of one column versus another. If not specified,
  355. the index of the DataFrame is used.
  356. y : label or position, optional
  357. Allows plotting of one column versus another. If not specified,
  358. all numerical columns are used.
  359. color : str, array-like, or dict, optional
  360. The color for each of the DataFrame's columns. Possible values are:
  361. - A single color string referred to by name, RGB or RGBA code,
  362. for instance 'red' or '#a98d19'.
  363. - A sequence of color strings referred to by name, RGB or RGBA
  364. code, which will be used for each column recursively. For
  365. instance ['green','yellow'] each column's %(kind)s will be filled in
  366. green or yellow, alternatively. If there is only a single column to
  367. be plotted, then only the first color from the color list will be
  368. used.
  369. - A dict of the form {column name : color}, so that each column will be
  370. colored accordingly. For example, if your columns are called `a` and
  371. `b`, then passing {'a': 'green', 'b': 'red'} will color %(kind)ss for
  372. column `a` in green and %(kind)ss for column `b` in red.
  373. .. versionadded:: 1.1.0
  374. **kwargs
  375. Additional keyword arguments are documented in
  376. :meth:`DataFrame.plot`.
  377. Returns
  378. -------
  379. matplotlib.axes.Axes or np.ndarray of them
  380. An ndarray is returned with one :class:`matplotlib.axes.Axes`
  381. per column when ``subplots=True``.
  382. """
  383. @Substitution(data="data : DataFrame\n The data to visualize.\n", backend="")
  384. @Appender(_boxplot_doc)
  385. def boxplot(
  386. data: DataFrame,
  387. column: str | list[str] | None = None,
  388. by: str | list[str] | None = None,
  389. ax: Axes | None = None,
  390. fontsize: float | str | None = None,
  391. rot: int = 0,
  392. grid: bool = True,
  393. figsize: tuple[float, float] | None = None,
  394. layout: tuple[int, int] | None = None,
  395. return_type: str | None = None,
  396. **kwargs,
  397. ):
  398. plot_backend = _get_plot_backend("matplotlib")
  399. return plot_backend.boxplot(
  400. data,
  401. column=column,
  402. by=by,
  403. ax=ax,
  404. fontsize=fontsize,
  405. rot=rot,
  406. grid=grid,
  407. figsize=figsize,
  408. layout=layout,
  409. return_type=return_type,
  410. **kwargs,
  411. )
  412. @Substitution(data="", backend=_backend_doc)
  413. @Appender(_boxplot_doc)
  414. def boxplot_frame(
  415. self,
  416. column=None,
  417. by=None,
  418. ax=None,
  419. fontsize=None,
  420. rot: int = 0,
  421. grid: bool = True,
  422. figsize=None,
  423. layout=None,
  424. return_type=None,
  425. backend=None,
  426. **kwargs,
  427. ):
  428. plot_backend = _get_plot_backend(backend)
  429. return plot_backend.boxplot_frame(
  430. self,
  431. column=column,
  432. by=by,
  433. ax=ax,
  434. fontsize=fontsize,
  435. rot=rot,
  436. grid=grid,
  437. figsize=figsize,
  438. layout=layout,
  439. return_type=return_type,
  440. **kwargs,
  441. )
  442. def boxplot_frame_groupby(
  443. grouped,
  444. subplots: bool = True,
  445. column=None,
  446. fontsize=None,
  447. rot: int = 0,
  448. grid: bool = True,
  449. ax=None,
  450. figsize=None,
  451. layout=None,
  452. sharex: bool = False,
  453. sharey: bool = True,
  454. backend=None,
  455. **kwargs,
  456. ):
  457. """
  458. Make box plots from DataFrameGroupBy data.
  459. Parameters
  460. ----------
  461. grouped : Grouped DataFrame
  462. subplots : bool
  463. * ``False`` - no subplots will be used
  464. * ``True`` - create a subplot for each group.
  465. column : column name or list of names, or vector
  466. Can be any valid input to groupby.
  467. fontsize : float or str
  468. rot : label rotation angle
  469. grid : Setting this to True will show the grid
  470. ax : Matplotlib axis object, default None
  471. figsize : A tuple (width, height) in inches
  472. layout : tuple (optional)
  473. The layout of the plot: (rows, columns).
  474. sharex : bool, default False
  475. Whether x-axes will be shared among subplots.
  476. sharey : bool, default True
  477. Whether y-axes will be shared among subplots.
  478. backend : str, default None
  479. Backend to use instead of the backend specified in the option
  480. ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
  481. specify the ``plotting.backend`` for the whole session, set
  482. ``pd.options.plotting.backend``.
  483. **kwargs
  484. All other plotting keyword arguments to be passed to
  485. matplotlib's boxplot function.
  486. Returns
  487. -------
  488. dict of key/value = group key/DataFrame.boxplot return value
  489. or DataFrame.boxplot return value in case subplots=figures=False
  490. Examples
  491. --------
  492. You can create boxplots for grouped data and show them as separate subplots:
  493. .. plot::
  494. :context: close-figs
  495. >>> import itertools
  496. >>> tuples = [t for t in itertools.product(range(1000), range(4))]
  497. >>> index = pd.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1'])
  498. >>> data = np.random.randn(len(index),4)
  499. >>> df = pd.DataFrame(data, columns=list('ABCD'), index=index)
  500. >>> grouped = df.groupby(level='lvl1')
  501. >>> grouped.boxplot(rot=45, fontsize=12, figsize=(8,10)) # doctest: +SKIP
  502. The ``subplots=False`` option shows the boxplots in a single figure.
  503. .. plot::
  504. :context: close-figs
  505. >>> grouped.boxplot(subplots=False, rot=45, fontsize=12) # doctest: +SKIP
  506. """
  507. plot_backend = _get_plot_backend(backend)
  508. return plot_backend.boxplot_frame_groupby(
  509. grouped,
  510. subplots=subplots,
  511. column=column,
  512. fontsize=fontsize,
  513. rot=rot,
  514. grid=grid,
  515. ax=ax,
  516. figsize=figsize,
  517. layout=layout,
  518. sharex=sharex,
  519. sharey=sharey,
  520. **kwargs,
  521. )
  522. class PlotAccessor(PandasObject):
  523. """
  524. Make plots of Series or DataFrame.
  525. Uses the backend specified by the
  526. option ``plotting.backend``. By default, matplotlib is used.
  527. Parameters
  528. ----------
  529. data : Series or DataFrame
  530. The object for which the method is called.
  531. x : label or position, default None
  532. Only used if data is a DataFrame.
  533. y : label, position or list of label, positions, default None
  534. Allows plotting of one column versus another. Only used if data is a
  535. DataFrame.
  536. kind : str
  537. The kind of plot to produce:
  538. - 'line' : line plot (default)
  539. - 'bar' : vertical bar plot
  540. - 'barh' : horizontal bar plot
  541. - 'hist' : histogram
  542. - 'box' : boxplot
  543. - 'kde' : Kernel Density Estimation plot
  544. - 'density' : same as 'kde'
  545. - 'area' : area plot
  546. - 'pie' : pie plot
  547. - 'scatter' : scatter plot (DataFrame only)
  548. - 'hexbin' : hexbin plot (DataFrame only)
  549. ax : matplotlib axes object, default None
  550. An axes of the current figure.
  551. subplots : bool or sequence of iterables, default False
  552. Whether to group columns into subplots:
  553. - ``False`` : No subplots will be used
  554. - ``True`` : Make separate subplots for each column.
  555. - sequence of iterables of column labels: Create a subplot for each
  556. group of columns. For example `[('a', 'c'), ('b', 'd')]` will
  557. create 2 subplots: one with columns 'a' and 'c', and one
  558. with columns 'b' and 'd'. Remaining columns that aren't specified
  559. will be plotted in additional subplots (one per column).
  560. .. versionadded:: 1.5.0
  561. sharex : bool, default True if ax is None else False
  562. In case ``subplots=True``, share x axis and set some x axis labels
  563. to invisible; defaults to True if ax is None otherwise False if
  564. an ax is passed in; Be aware, that passing in both an ax and
  565. ``sharex=True`` will alter all x axis labels for all axis in a figure.
  566. sharey : bool, default False
  567. In case ``subplots=True``, share y axis and set some y axis labels to invisible.
  568. layout : tuple, optional
  569. (rows, columns) for the layout of subplots.
  570. figsize : a tuple (width, height) in inches
  571. Size of a figure object.
  572. use_index : bool, default True
  573. Use index as ticks for x axis.
  574. title : str or list
  575. Title to use for the plot. If a string is passed, print the string
  576. at the top of the figure. If a list is passed and `subplots` is
  577. True, print each item in the list above the corresponding subplot.
  578. grid : bool, default None (matlab style default)
  579. Axis grid lines.
  580. legend : bool or {'reverse'}
  581. Place legend on axis subplots.
  582. style : list or dict
  583. The matplotlib line style per column.
  584. logx : bool or 'sym', default False
  585. Use log scaling or symlog scaling on x axis.
  586. logy : bool or 'sym' default False
  587. Use log scaling or symlog scaling on y axis.
  588. loglog : bool or 'sym', default False
  589. Use log scaling or symlog scaling on both x and y axes.
  590. xticks : sequence
  591. Values to use for the xticks.
  592. yticks : sequence
  593. Values to use for the yticks.
  594. xlim : 2-tuple/list
  595. Set the x limits of the current axes.
  596. ylim : 2-tuple/list
  597. Set the y limits of the current axes.
  598. xlabel : label, optional
  599. Name to use for the xlabel on x-axis. Default uses index name as xlabel, or the
  600. x-column name for planar plots.
  601. .. versionadded:: 1.1.0
  602. .. versionchanged:: 1.2.0
  603. Now applicable to planar plots (`scatter`, `hexbin`).
  604. .. versionchanged:: 2.0.0
  605. Now applicable to histograms.
  606. ylabel : label, optional
  607. Name to use for the ylabel on y-axis. Default will show no ylabel, or the
  608. y-column name for planar plots.
  609. .. versionadded:: 1.1.0
  610. .. versionchanged:: 1.2.0
  611. Now applicable to planar plots (`scatter`, `hexbin`).
  612. .. versionchanged:: 2.0.0
  613. Now applicable to histograms.
  614. rot : float, default None
  615. Rotation for ticks (xticks for vertical, yticks for horizontal
  616. plots).
  617. fontsize : float, default None
  618. Font size for xticks and yticks.
  619. colormap : str or matplotlib colormap object, default None
  620. Colormap to select colors from. If string, load colormap with that
  621. name from matplotlib.
  622. colorbar : bool, optional
  623. If True, plot colorbar (only relevant for 'scatter' and 'hexbin'
  624. plots).
  625. position : float
  626. Specify relative alignments for bar plot layout.
  627. From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5
  628. (center).
  629. table : bool, Series or DataFrame, default False
  630. If True, draw a table using the data in the DataFrame and the data
  631. will be transposed to meet matplotlib's default layout.
  632. If a Series or DataFrame is passed, use passed data to draw a
  633. table.
  634. yerr : DataFrame, Series, array-like, dict and str
  635. See :ref:`Plotting with Error Bars <visualization.errorbars>` for
  636. detail.
  637. xerr : DataFrame, Series, array-like, dict and str
  638. Equivalent to yerr.
  639. stacked : bool, default False in line and bar plots, and True in area plot
  640. If True, create stacked plot.
  641. secondary_y : bool or sequence, default False
  642. Whether to plot on the secondary y-axis if a list/tuple, which
  643. columns to plot on secondary y-axis.
  644. mark_right : bool, default True
  645. When using a secondary_y axis, automatically mark the column
  646. labels with "(right)" in the legend.
  647. include_bool : bool, default is False
  648. If True, boolean values can be plotted.
  649. backend : str, default None
  650. Backend to use instead of the backend specified in the option
  651. ``plotting.backend``. For instance, 'matplotlib'. Alternatively, to
  652. specify the ``plotting.backend`` for the whole session, set
  653. ``pd.options.plotting.backend``.
  654. **kwargs
  655. Options to pass to matplotlib plotting method.
  656. Returns
  657. -------
  658. :class:`matplotlib.axes.Axes` or numpy.ndarray of them
  659. If the backend is not the default matplotlib one, the return value
  660. will be the object returned by the backend.
  661. Notes
  662. -----
  663. - See matplotlib documentation online for more on this subject
  664. - If `kind` = 'bar' or 'barh', you can specify relative alignments
  665. for bar plot layout by `position` keyword.
  666. From 0 (left/bottom-end) to 1 (right/top-end). Default is 0.5
  667. (center)
  668. """
  669. _common_kinds = ("line", "bar", "barh", "kde", "density", "area", "hist", "box")
  670. _series_kinds = ("pie",)
  671. _dataframe_kinds = ("scatter", "hexbin")
  672. _kind_aliases = {"density": "kde"}
  673. _all_kinds = _common_kinds + _series_kinds + _dataframe_kinds
  674. def __init__(self, data) -> None:
  675. self._parent = data
  676. @staticmethod
  677. def _get_call_args(backend_name, data, args, kwargs):
  678. """
  679. This function makes calls to this accessor `__call__` method compatible
  680. with the previous `SeriesPlotMethods.__call__` and
  681. `DataFramePlotMethods.__call__`. Those had slightly different
  682. signatures, since `DataFramePlotMethods` accepted `x` and `y`
  683. parameters.
  684. """
  685. if isinstance(data, ABCSeries):
  686. arg_def = [
  687. ("kind", "line"),
  688. ("ax", None),
  689. ("figsize", None),
  690. ("use_index", True),
  691. ("title", None),
  692. ("grid", None),
  693. ("legend", False),
  694. ("style", None),
  695. ("logx", False),
  696. ("logy", False),
  697. ("loglog", False),
  698. ("xticks", None),
  699. ("yticks", None),
  700. ("xlim", None),
  701. ("ylim", None),
  702. ("rot", None),
  703. ("fontsize", None),
  704. ("colormap", None),
  705. ("table", False),
  706. ("yerr", None),
  707. ("xerr", None),
  708. ("label", None),
  709. ("secondary_y", False),
  710. ("xlabel", None),
  711. ("ylabel", None),
  712. ]
  713. elif isinstance(data, ABCDataFrame):
  714. arg_def = [
  715. ("x", None),
  716. ("y", None),
  717. ("kind", "line"),
  718. ("ax", None),
  719. ("subplots", False),
  720. ("sharex", None),
  721. ("sharey", False),
  722. ("layout", None),
  723. ("figsize", None),
  724. ("use_index", True),
  725. ("title", None),
  726. ("grid", None),
  727. ("legend", True),
  728. ("style", None),
  729. ("logx", False),
  730. ("logy", False),
  731. ("loglog", False),
  732. ("xticks", None),
  733. ("yticks", None),
  734. ("xlim", None),
  735. ("ylim", None),
  736. ("rot", None),
  737. ("fontsize", None),
  738. ("colormap", None),
  739. ("table", False),
  740. ("yerr", None),
  741. ("xerr", None),
  742. ("secondary_y", False),
  743. ("xlabel", None),
  744. ("ylabel", None),
  745. ]
  746. else:
  747. raise TypeError(
  748. f"Called plot accessor for type {type(data).__name__}, "
  749. "expected Series or DataFrame"
  750. )
  751. if args and isinstance(data, ABCSeries):
  752. positional_args = str(args)[1:-1]
  753. keyword_args = ", ".join(
  754. [f"{name}={repr(value)}" for (name, _), value in zip(arg_def, args)]
  755. )
  756. msg = (
  757. "`Series.plot()` should not be called with positional "
  758. "arguments, only keyword arguments. The order of "
  759. "positional arguments will change in the future. "
  760. f"Use `Series.plot({keyword_args})` instead of "
  761. f"`Series.plot({positional_args})`."
  762. )
  763. raise TypeError(msg)
  764. pos_args = {name: value for (name, _), value in zip(arg_def, args)}
  765. if backend_name == "pandas.plotting._matplotlib":
  766. kwargs = dict(arg_def, **pos_args, **kwargs)
  767. else:
  768. kwargs = dict(pos_args, **kwargs)
  769. x = kwargs.pop("x", None)
  770. y = kwargs.pop("y", None)
  771. kind = kwargs.pop("kind", "line")
  772. return x, y, kind, kwargs
  773. def __call__(self, *args, **kwargs):
  774. plot_backend = _get_plot_backend(kwargs.pop("backend", None))
  775. x, y, kind, kwargs = self._get_call_args(
  776. plot_backend.__name__, self._parent, args, kwargs
  777. )
  778. kind = self._kind_aliases.get(kind, kind)
  779. # when using another backend, get out of the way
  780. if plot_backend.__name__ != "pandas.plotting._matplotlib":
  781. return plot_backend.plot(self._parent, x=x, y=y, kind=kind, **kwargs)
  782. if kind not in self._all_kinds:
  783. raise ValueError(f"{kind} is not a valid plot kind")
  784. # The original data structured can be transformed before passed to the
  785. # backend. For example, for DataFrame is common to set the index as the
  786. # `x` parameter, and return a Series with the parameter `y` as values.
  787. data = self._parent.copy()
  788. if isinstance(data, ABCSeries):
  789. kwargs["reuse_plot"] = True
  790. if kind in self._dataframe_kinds:
  791. if isinstance(data, ABCDataFrame):
  792. return plot_backend.plot(data, x=x, y=y, kind=kind, **kwargs)
  793. else:
  794. raise ValueError(f"plot kind {kind} can only be used for data frames")
  795. elif kind in self._series_kinds:
  796. if isinstance(data, ABCDataFrame):
  797. if y is None and kwargs.get("subplots") is False:
  798. raise ValueError(
  799. f"{kind} requires either y column or 'subplots=True'"
  800. )
  801. if y is not None:
  802. if is_integer(y) and not data.columns._holds_integer():
  803. y = data.columns[y]
  804. # converted to series actually. copy to not modify
  805. data = data[y].copy()
  806. data.index.name = y
  807. elif isinstance(data, ABCDataFrame):
  808. data_cols = data.columns
  809. if x is not None:
  810. if is_integer(x) and not data.columns._holds_integer():
  811. x = data_cols[x]
  812. elif not isinstance(data[x], ABCSeries):
  813. raise ValueError("x must be a label or position")
  814. data = data.set_index(x)
  815. if y is not None:
  816. # check if we have y as int or list of ints
  817. int_ylist = is_list_like(y) and all(is_integer(c) for c in y)
  818. int_y_arg = is_integer(y) or int_ylist
  819. if int_y_arg and not data.columns._holds_integer():
  820. y = data_cols[y]
  821. label_kw = kwargs["label"] if "label" in kwargs else False
  822. for kw in ["xerr", "yerr"]:
  823. if kw in kwargs and (
  824. isinstance(kwargs[kw], str) or is_integer(kwargs[kw])
  825. ):
  826. try:
  827. kwargs[kw] = data[kwargs[kw]]
  828. except (IndexError, KeyError, TypeError):
  829. pass
  830. # don't overwrite
  831. data = data[y].copy()
  832. if isinstance(data, ABCSeries):
  833. label_name = label_kw or y
  834. data.name = label_name
  835. else:
  836. match = is_list_like(label_kw) and len(label_kw) == len(y)
  837. if label_kw and not match:
  838. raise ValueError(
  839. "label should be list-like and same length as y"
  840. )
  841. label_name = label_kw or data.columns
  842. data.columns = label_name
  843. return plot_backend.plot(data, kind=kind, **kwargs)
  844. __call__.__doc__ = __doc__
  845. @Appender(
  846. """
  847. See Also
  848. --------
  849. matplotlib.pyplot.plot : Plot y versus x as lines and/or markers.
  850. Examples
  851. --------
  852. .. plot::
  853. :context: close-figs
  854. >>> s = pd.Series([1, 3, 2])
  855. >>> s.plot.line()
  856. <AxesSubplot: ylabel='Density'>
  857. .. plot::
  858. :context: close-figs
  859. The following example shows the populations for some animals
  860. over the years.
  861. >>> df = pd.DataFrame({
  862. ... 'pig': [20, 18, 489, 675, 1776],
  863. ... 'horse': [4, 25, 281, 600, 1900]
  864. ... }, index=[1990, 1997, 2003, 2009, 2014])
  865. >>> lines = df.plot.line()
  866. .. plot::
  867. :context: close-figs
  868. An example with subplots, so an array of axes is returned.
  869. >>> axes = df.plot.line(subplots=True)
  870. >>> type(axes)
  871. <class 'numpy.ndarray'>
  872. .. plot::
  873. :context: close-figs
  874. Let's repeat the same example, but specifying colors for
  875. each column (in this case, for each animal).
  876. >>> axes = df.plot.line(
  877. ... subplots=True, color={"pig": "pink", "horse": "#742802"}
  878. ... )
  879. .. plot::
  880. :context: close-figs
  881. The following example shows the relationship between both
  882. populations.
  883. >>> lines = df.plot.line(x='pig', y='horse')
  884. """
  885. )
  886. @Substitution(kind="line")
  887. @Appender(_bar_or_line_doc)
  888. def line(self, x=None, y=None, **kwargs) -> PlotAccessor:
  889. """
  890. Plot Series or DataFrame as lines.
  891. This function is useful to plot lines using DataFrame's values
  892. as coordinates.
  893. """
  894. return self(kind="line", x=x, y=y, **kwargs)
  895. @Appender(
  896. """
  897. See Also
  898. --------
  899. DataFrame.plot.barh : Horizontal bar plot.
  900. DataFrame.plot : Make plots of a DataFrame.
  901. matplotlib.pyplot.bar : Make a bar plot with matplotlib.
  902. Examples
  903. --------
  904. Basic plot.
  905. .. plot::
  906. :context: close-figs
  907. >>> df = pd.DataFrame({'lab':['A', 'B', 'C'], 'val':[10, 30, 20]})
  908. >>> ax = df.plot.bar(x='lab', y='val', rot=0)
  909. Plot a whole dataframe to a bar plot. Each column is assigned a
  910. distinct color, and each row is nested in a group along the
  911. horizontal axis.
  912. .. plot::
  913. :context: close-figs
  914. >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
  915. >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
  916. >>> index = ['snail', 'pig', 'elephant',
  917. ... 'rabbit', 'giraffe', 'coyote', 'horse']
  918. >>> df = pd.DataFrame({'speed': speed,
  919. ... 'lifespan': lifespan}, index=index)
  920. >>> ax = df.plot.bar(rot=0)
  921. Plot stacked bar charts for the DataFrame
  922. .. plot::
  923. :context: close-figs
  924. >>> ax = df.plot.bar(stacked=True)
  925. Instead of nesting, the figure can be split by column with
  926. ``subplots=True``. In this case, a :class:`numpy.ndarray` of
  927. :class:`matplotlib.axes.Axes` are returned.
  928. .. plot::
  929. :context: close-figs
  930. >>> axes = df.plot.bar(rot=0, subplots=True)
  931. >>> axes[1].legend(loc=2) # doctest: +SKIP
  932. If you don't like the default colours, you can specify how you'd
  933. like each column to be colored.
  934. .. plot::
  935. :context: close-figs
  936. >>> axes = df.plot.bar(
  937. ... rot=0, subplots=True, color={"speed": "red", "lifespan": "green"}
  938. ... )
  939. >>> axes[1].legend(loc=2) # doctest: +SKIP
  940. Plot a single column.
  941. .. plot::
  942. :context: close-figs
  943. >>> ax = df.plot.bar(y='speed', rot=0)
  944. Plot only selected categories for the DataFrame.
  945. .. plot::
  946. :context: close-figs
  947. >>> ax = df.plot.bar(x='lifespan', rot=0)
  948. """
  949. )
  950. @Substitution(kind="bar")
  951. @Appender(_bar_or_line_doc)
  952. def bar( # pylint: disable=disallowed-name
  953. self, x=None, y=None, **kwargs
  954. ) -> PlotAccessor:
  955. """
  956. Vertical bar plot.
  957. A bar plot is a plot that presents categorical data with
  958. rectangular bars with lengths proportional to the values that they
  959. represent. A bar plot shows comparisons among discrete categories. One
  960. axis of the plot shows the specific categories being compared, and the
  961. other axis represents a measured value.
  962. """
  963. return self(kind="bar", x=x, y=y, **kwargs)
  964. @Appender(
  965. """
  966. See Also
  967. --------
  968. DataFrame.plot.bar: Vertical bar plot.
  969. DataFrame.plot : Make plots of DataFrame using matplotlib.
  970. matplotlib.axes.Axes.bar : Plot a vertical bar plot using matplotlib.
  971. Examples
  972. --------
  973. Basic example
  974. .. plot::
  975. :context: close-figs
  976. >>> df = pd.DataFrame({'lab': ['A', 'B', 'C'], 'val': [10, 30, 20]})
  977. >>> ax = df.plot.barh(x='lab', y='val')
  978. Plot a whole DataFrame to a horizontal bar plot
  979. .. plot::
  980. :context: close-figs
  981. >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
  982. >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
  983. >>> index = ['snail', 'pig', 'elephant',
  984. ... 'rabbit', 'giraffe', 'coyote', 'horse']
  985. >>> df = pd.DataFrame({'speed': speed,
  986. ... 'lifespan': lifespan}, index=index)
  987. >>> ax = df.plot.barh()
  988. Plot stacked barh charts for the DataFrame
  989. .. plot::
  990. :context: close-figs
  991. >>> ax = df.plot.barh(stacked=True)
  992. We can specify colors for each column
  993. .. plot::
  994. :context: close-figs
  995. >>> ax = df.plot.barh(color={"speed": "red", "lifespan": "green"})
  996. Plot a column of the DataFrame to a horizontal bar plot
  997. .. plot::
  998. :context: close-figs
  999. >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
  1000. >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
  1001. >>> index = ['snail', 'pig', 'elephant',
  1002. ... 'rabbit', 'giraffe', 'coyote', 'horse']
  1003. >>> df = pd.DataFrame({'speed': speed,
  1004. ... 'lifespan': lifespan}, index=index)
  1005. >>> ax = df.plot.barh(y='speed')
  1006. Plot DataFrame versus the desired column
  1007. .. plot::
  1008. :context: close-figs
  1009. >>> speed = [0.1, 17.5, 40, 48, 52, 69, 88]
  1010. >>> lifespan = [2, 8, 70, 1.5, 25, 12, 28]
  1011. >>> index = ['snail', 'pig', 'elephant',
  1012. ... 'rabbit', 'giraffe', 'coyote', 'horse']
  1013. >>> df = pd.DataFrame({'speed': speed,
  1014. ... 'lifespan': lifespan}, index=index)
  1015. >>> ax = df.plot.barh(x='lifespan')
  1016. """
  1017. )
  1018. @Substitution(kind="bar")
  1019. @Appender(_bar_or_line_doc)
  1020. def barh(self, x=None, y=None, **kwargs) -> PlotAccessor:
  1021. """
  1022. Make a horizontal bar plot.
  1023. A horizontal bar plot is a plot that presents quantitative data with
  1024. rectangular bars with lengths proportional to the values that they
  1025. represent. A bar plot shows comparisons among discrete categories. One
  1026. axis of the plot shows the specific categories being compared, and the
  1027. other axis represents a measured value.
  1028. """
  1029. return self(kind="barh", x=x, y=y, **kwargs)
  1030. def box(self, by=None, **kwargs) -> PlotAccessor:
  1031. r"""
  1032. Make a box plot of the DataFrame columns.
  1033. A box plot is a method for graphically depicting groups of numerical
  1034. data through their quartiles.
  1035. The box extends from the Q1 to Q3 quartile values of the data,
  1036. with a line at the median (Q2). The whiskers extend from the edges
  1037. of box to show the range of the data. The position of the whiskers
  1038. is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the
  1039. box. Outlier points are those past the end of the whiskers.
  1040. For further details see Wikipedia's
  1041. entry for `boxplot <https://en.wikipedia.org/wiki/Box_plot>`__.
  1042. A consideration when using this chart is that the box and the whiskers
  1043. can overlap, which is very common when plotting small sets of data.
  1044. Parameters
  1045. ----------
  1046. by : str or sequence
  1047. Column in the DataFrame to group by.
  1048. .. versionchanged:: 1.4.0
  1049. Previously, `by` is silently ignore and makes no groupings
  1050. **kwargs
  1051. Additional keywords are documented in
  1052. :meth:`DataFrame.plot`.
  1053. Returns
  1054. -------
  1055. :class:`matplotlib.axes.Axes` or numpy.ndarray of them
  1056. See Also
  1057. --------
  1058. DataFrame.boxplot: Another method to draw a box plot.
  1059. Series.plot.box: Draw a box plot from a Series object.
  1060. matplotlib.pyplot.boxplot: Draw a box plot in matplotlib.
  1061. Examples
  1062. --------
  1063. Draw a box plot from a DataFrame with four columns of randomly
  1064. generated data.
  1065. .. plot::
  1066. :context: close-figs
  1067. >>> data = np.random.randn(25, 4)
  1068. >>> df = pd.DataFrame(data, columns=list('ABCD'))
  1069. >>> ax = df.plot.box()
  1070. You can also generate groupings if you specify the `by` parameter (which
  1071. can take a column name, or a list or tuple of column names):
  1072. .. versionchanged:: 1.4.0
  1073. .. plot::
  1074. :context: close-figs
  1075. >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]
  1076. >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list})
  1077. >>> ax = df.plot.box(column="age", by="gender", figsize=(10, 8))
  1078. """
  1079. return self(kind="box", by=by, **kwargs)
  1080. def hist(self, by=None, bins: int = 10, **kwargs) -> PlotAccessor:
  1081. """
  1082. Draw one histogram of the DataFrame's columns.
  1083. A histogram is a representation of the distribution of data.
  1084. This function groups the values of all given Series in the DataFrame
  1085. into bins and draws all bins in one :class:`matplotlib.axes.Axes`.
  1086. This is useful when the DataFrame's Series are in a similar scale.
  1087. Parameters
  1088. ----------
  1089. by : str or sequence, optional
  1090. Column in the DataFrame to group by.
  1091. .. versionchanged:: 1.4.0
  1092. Previously, `by` is silently ignore and makes no groupings
  1093. bins : int, default 10
  1094. Number of histogram bins to be used.
  1095. **kwargs
  1096. Additional keyword arguments are documented in
  1097. :meth:`DataFrame.plot`.
  1098. Returns
  1099. -------
  1100. class:`matplotlib.AxesSubplot`
  1101. Return a histogram plot.
  1102. See Also
  1103. --------
  1104. DataFrame.hist : Draw histograms per DataFrame's Series.
  1105. Series.hist : Draw a histogram with Series' data.
  1106. Examples
  1107. --------
  1108. When we roll a die 6000 times, we expect to get each value around 1000
  1109. times. But when we roll two dice and sum the result, the distribution
  1110. is going to be quite different. A histogram illustrates those
  1111. distributions.
  1112. .. plot::
  1113. :context: close-figs
  1114. >>> df = pd.DataFrame(
  1115. ... np.random.randint(1, 7, 6000),
  1116. ... columns = ['one'])
  1117. >>> df['two'] = df['one'] + np.random.randint(1, 7, 6000)
  1118. >>> ax = df.plot.hist(bins=12, alpha=0.5)
  1119. A grouped histogram can be generated by providing the parameter `by` (which
  1120. can be a column name, or a list of column names):
  1121. .. plot::
  1122. :context: close-figs
  1123. >>> age_list = [8, 10, 12, 14, 72, 74, 76, 78, 20, 25, 30, 35, 60, 85]
  1124. >>> df = pd.DataFrame({"gender": list("MMMMMMMMFFFFFF"), "age": age_list})
  1125. >>> ax = df.plot.hist(column=["age"], by="gender", figsize=(10, 8))
  1126. """
  1127. return self(kind="hist", by=by, bins=bins, **kwargs)
  1128. def kde(self, bw_method=None, ind=None, **kwargs) -> PlotAccessor:
  1129. """
  1130. Generate Kernel Density Estimate plot using Gaussian kernels.
  1131. In statistics, `kernel density estimation`_ (KDE) is a non-parametric
  1132. way to estimate the probability density function (PDF) of a random
  1133. variable. This function uses Gaussian kernels and includes automatic
  1134. bandwidth determination.
  1135. .. _kernel density estimation:
  1136. https://en.wikipedia.org/wiki/Kernel_density_estimation
  1137. Parameters
  1138. ----------
  1139. bw_method : str, scalar or callable, optional
  1140. The method used to calculate the estimator bandwidth. This can be
  1141. 'scott', 'silverman', a scalar constant or a callable.
  1142. If None (default), 'scott' is used.
  1143. See :class:`scipy.stats.gaussian_kde` for more information.
  1144. ind : NumPy array or int, optional
  1145. Evaluation points for the estimated PDF. If None (default),
  1146. 1000 equally spaced points are used. If `ind` is a NumPy array, the
  1147. KDE is evaluated at the points passed. If `ind` is an integer,
  1148. `ind` number of equally spaced points are used.
  1149. **kwargs
  1150. Additional keyword arguments are documented in
  1151. :meth:`DataFrame.plot`.
  1152. Returns
  1153. -------
  1154. matplotlib.axes.Axes or numpy.ndarray of them
  1155. See Also
  1156. --------
  1157. scipy.stats.gaussian_kde : Representation of a kernel-density
  1158. estimate using Gaussian kernels. This is the function used
  1159. internally to estimate the PDF.
  1160. Examples
  1161. --------
  1162. Given a Series of points randomly sampled from an unknown
  1163. distribution, estimate its PDF using KDE with automatic
  1164. bandwidth determination and plot the results, evaluating them at
  1165. 1000 equally spaced points (default):
  1166. .. plot::
  1167. :context: close-figs
  1168. >>> s = pd.Series([1, 2, 2.5, 3, 3.5, 4, 5])
  1169. >>> ax = s.plot.kde()
  1170. A scalar bandwidth can be specified. Using a small bandwidth value can
  1171. lead to over-fitting, while using a large bandwidth value may result
  1172. in under-fitting:
  1173. .. plot::
  1174. :context: close-figs
  1175. >>> ax = s.plot.kde(bw_method=0.3)
  1176. .. plot::
  1177. :context: close-figs
  1178. >>> ax = s.plot.kde(bw_method=3)
  1179. Finally, the `ind` parameter determines the evaluation points for the
  1180. plot of the estimated PDF:
  1181. .. plot::
  1182. :context: close-figs
  1183. >>> ax = s.plot.kde(ind=[1, 2, 3, 4, 5])
  1184. For DataFrame, it works in the same way:
  1185. .. plot::
  1186. :context: close-figs
  1187. >>> df = pd.DataFrame({
  1188. ... 'x': [1, 2, 2.5, 3, 3.5, 4, 5],
  1189. ... 'y': [4, 4, 4.5, 5, 5.5, 6, 6],
  1190. ... })
  1191. >>> ax = df.plot.kde()
  1192. A scalar bandwidth can be specified. Using a small bandwidth value can
  1193. lead to over-fitting, while using a large bandwidth value may result
  1194. in under-fitting:
  1195. .. plot::
  1196. :context: close-figs
  1197. >>> ax = df.plot.kde(bw_method=0.3)
  1198. .. plot::
  1199. :context: close-figs
  1200. >>> ax = df.plot.kde(bw_method=3)
  1201. Finally, the `ind` parameter determines the evaluation points for the
  1202. plot of the estimated PDF:
  1203. .. plot::
  1204. :context: close-figs
  1205. >>> ax = df.plot.kde(ind=[1, 2, 3, 4, 5, 6])
  1206. """
  1207. return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs)
  1208. density = kde
  1209. def area(self, x=None, y=None, stacked: bool = True, **kwargs) -> PlotAccessor:
  1210. """
  1211. Draw a stacked area plot.
  1212. An area plot displays quantitative data visually.
  1213. This function wraps the matplotlib area function.
  1214. Parameters
  1215. ----------
  1216. x : label or position, optional
  1217. Coordinates for the X axis. By default uses the index.
  1218. y : label or position, optional
  1219. Column to plot. By default uses all columns.
  1220. stacked : bool, default True
  1221. Area plots are stacked by default. Set to False to create a
  1222. unstacked plot.
  1223. **kwargs
  1224. Additional keyword arguments are documented in
  1225. :meth:`DataFrame.plot`.
  1226. Returns
  1227. -------
  1228. matplotlib.axes.Axes or numpy.ndarray
  1229. Area plot, or array of area plots if subplots is True.
  1230. See Also
  1231. --------
  1232. DataFrame.plot : Make plots of DataFrame using matplotlib / pylab.
  1233. Examples
  1234. --------
  1235. Draw an area plot based on basic business metrics:
  1236. .. plot::
  1237. :context: close-figs
  1238. >>> df = pd.DataFrame({
  1239. ... 'sales': [3, 2, 3, 9, 10, 6],
  1240. ... 'signups': [5, 5, 6, 12, 14, 13],
  1241. ... 'visits': [20, 42, 28, 62, 81, 50],
  1242. ... }, index=pd.date_range(start='2018/01/01', end='2018/07/01',
  1243. ... freq='M'))
  1244. >>> ax = df.plot.area()
  1245. Area plots are stacked by default. To produce an unstacked plot,
  1246. pass ``stacked=False``:
  1247. .. plot::
  1248. :context: close-figs
  1249. >>> ax = df.plot.area(stacked=False)
  1250. Draw an area plot for a single column:
  1251. .. plot::
  1252. :context: close-figs
  1253. >>> ax = df.plot.area(y='sales')
  1254. Draw with a different `x`:
  1255. .. plot::
  1256. :context: close-figs
  1257. >>> df = pd.DataFrame({
  1258. ... 'sales': [3, 2, 3],
  1259. ... 'visits': [20, 42, 28],
  1260. ... 'day': [1, 2, 3],
  1261. ... })
  1262. >>> ax = df.plot.area(x='day')
  1263. """
  1264. return self(kind="area", x=x, y=y, stacked=stacked, **kwargs)
  1265. def pie(self, **kwargs) -> PlotAccessor:
  1266. """
  1267. Generate a pie plot.
  1268. A pie plot is a proportional representation of the numerical data in a
  1269. column. This function wraps :meth:`matplotlib.pyplot.pie` for the
  1270. specified column. If no column reference is passed and
  1271. ``subplots=True`` a pie plot is drawn for each numerical column
  1272. independently.
  1273. Parameters
  1274. ----------
  1275. y : int or label, optional
  1276. Label or position of the column to plot.
  1277. If not provided, ``subplots=True`` argument must be passed.
  1278. **kwargs
  1279. Keyword arguments to pass on to :meth:`DataFrame.plot`.
  1280. Returns
  1281. -------
  1282. matplotlib.axes.Axes or np.ndarray of them
  1283. A NumPy array is returned when `subplots` is True.
  1284. See Also
  1285. --------
  1286. Series.plot.pie : Generate a pie plot for a Series.
  1287. DataFrame.plot : Make plots of a DataFrame.
  1288. Examples
  1289. --------
  1290. In the example below we have a DataFrame with the information about
  1291. planet's mass and radius. We pass the 'mass' column to the
  1292. pie function to get a pie plot.
  1293. .. plot::
  1294. :context: close-figs
  1295. >>> df = pd.DataFrame({'mass': [0.330, 4.87 , 5.97],
  1296. ... 'radius': [2439.7, 6051.8, 6378.1]},
  1297. ... index=['Mercury', 'Venus', 'Earth'])
  1298. >>> plot = df.plot.pie(y='mass', figsize=(5, 5))
  1299. .. plot::
  1300. :context: close-figs
  1301. >>> plot = df.plot.pie(subplots=True, figsize=(11, 6))
  1302. """
  1303. if (
  1304. isinstance(self._parent, ABCDataFrame)
  1305. and kwargs.get("y", None) is None
  1306. and not kwargs.get("subplots", False)
  1307. ):
  1308. raise ValueError("pie requires either y column or 'subplots=True'")
  1309. return self(kind="pie", **kwargs)
  1310. def scatter(self, x, y, s=None, c=None, **kwargs) -> PlotAccessor:
  1311. """
  1312. Create a scatter plot with varying marker point size and color.
  1313. The coordinates of each point are defined by two dataframe columns and
  1314. filled circles are used to represent each point. This kind of plot is
  1315. useful to see complex correlations between two variables. Points could
  1316. be for instance natural 2D coordinates like longitude and latitude in
  1317. a map or, in general, any pair of metrics that can be plotted against
  1318. each other.
  1319. Parameters
  1320. ----------
  1321. x : int or str
  1322. The column name or column position to be used as horizontal
  1323. coordinates for each point.
  1324. y : int or str
  1325. The column name or column position to be used as vertical
  1326. coordinates for each point.
  1327. s : str, scalar or array-like, optional
  1328. The size of each point. Possible values are:
  1329. - A string with the name of the column to be used for marker's size.
  1330. - A single scalar so all points have the same size.
  1331. - A sequence of scalars, which will be used for each point's size
  1332. recursively. For instance, when passing [2,14] all points size
  1333. will be either 2 or 14, alternatively.
  1334. .. versionchanged:: 1.1.0
  1335. c : str, int or array-like, optional
  1336. The color of each point. Possible values are:
  1337. - A single color string referred to by name, RGB or RGBA code,
  1338. for instance 'red' or '#a98d19'.
  1339. - A sequence of color strings referred to by name, RGB or RGBA
  1340. code, which will be used for each point's color recursively. For
  1341. instance ['green','yellow'] all points will be filled in green or
  1342. yellow, alternatively.
  1343. - A column name or position whose values will be used to color the
  1344. marker points according to a colormap.
  1345. **kwargs
  1346. Keyword arguments to pass on to :meth:`DataFrame.plot`.
  1347. Returns
  1348. -------
  1349. :class:`matplotlib.axes.Axes` or numpy.ndarray of them
  1350. See Also
  1351. --------
  1352. matplotlib.pyplot.scatter : Scatter plot using multiple input data
  1353. formats.
  1354. Examples
  1355. --------
  1356. Let's see how to draw a scatter plot using coordinates from the values
  1357. in a DataFrame's columns.
  1358. .. plot::
  1359. :context: close-figs
  1360. >>> df = pd.DataFrame([[5.1, 3.5, 0], [4.9, 3.0, 0], [7.0, 3.2, 1],
  1361. ... [6.4, 3.2, 1], [5.9, 3.0, 2]],
  1362. ... columns=['length', 'width', 'species'])
  1363. >>> ax1 = df.plot.scatter(x='length',
  1364. ... y='width',
  1365. ... c='DarkBlue')
  1366. And now with the color determined by a column as well.
  1367. .. plot::
  1368. :context: close-figs
  1369. >>> ax2 = df.plot.scatter(x='length',
  1370. ... y='width',
  1371. ... c='species',
  1372. ... colormap='viridis')
  1373. """
  1374. return self(kind="scatter", x=x, y=y, s=s, c=c, **kwargs)
  1375. def hexbin(
  1376. self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwargs
  1377. ) -> PlotAccessor:
  1378. """
  1379. Generate a hexagonal binning plot.
  1380. Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None`
  1381. (the default), this is a histogram of the number of occurrences
  1382. of the observations at ``(x[i], y[i])``.
  1383. If `C` is specified, specifies values at given coordinates
  1384. ``(x[i], y[i])``. These values are accumulated for each hexagonal
  1385. bin and then reduced according to `reduce_C_function`,
  1386. having as default the NumPy's mean function (:meth:`numpy.mean`).
  1387. (If `C` is specified, it must also be a 1-D sequence
  1388. of the same length as `x` and `y`, or a column label.)
  1389. Parameters
  1390. ----------
  1391. x : int or str
  1392. The column label or position for x points.
  1393. y : int or str
  1394. The column label or position for y points.
  1395. C : int or str, optional
  1396. The column label or position for the value of `(x, y)` point.
  1397. reduce_C_function : callable, default `np.mean`
  1398. Function of one argument that reduces all the values in a bin to
  1399. a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`).
  1400. gridsize : int or tuple of (int, int), default 100
  1401. The number of hexagons in the x-direction.
  1402. The corresponding number of hexagons in the y-direction is
  1403. chosen in a way that the hexagons are approximately regular.
  1404. Alternatively, gridsize can be a tuple with two elements
  1405. specifying the number of hexagons in the x-direction and the
  1406. y-direction.
  1407. **kwargs
  1408. Additional keyword arguments are documented in
  1409. :meth:`DataFrame.plot`.
  1410. Returns
  1411. -------
  1412. matplotlib.AxesSubplot
  1413. The matplotlib ``Axes`` on which the hexbin is plotted.
  1414. See Also
  1415. --------
  1416. DataFrame.plot : Make plots of a DataFrame.
  1417. matplotlib.pyplot.hexbin : Hexagonal binning plot using matplotlib,
  1418. the matplotlib function that is used under the hood.
  1419. Examples
  1420. --------
  1421. The following examples are generated with random data from
  1422. a normal distribution.
  1423. .. plot::
  1424. :context: close-figs
  1425. >>> n = 10000
  1426. >>> df = pd.DataFrame({'x': np.random.randn(n),
  1427. ... 'y': np.random.randn(n)})
  1428. >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20)
  1429. The next example uses `C` and `np.sum` as `reduce_C_function`.
  1430. Note that `'observations'` values ranges from 1 to 5 but the result
  1431. plot shows values up to more than 25. This is because of the
  1432. `reduce_C_function`.
  1433. .. plot::
  1434. :context: close-figs
  1435. >>> n = 500
  1436. >>> df = pd.DataFrame({
  1437. ... 'coord_x': np.random.uniform(-3, 3, size=n),
  1438. ... 'coord_y': np.random.uniform(30, 50, size=n),
  1439. ... 'observations': np.random.randint(1,5, size=n)
  1440. ... })
  1441. >>> ax = df.plot.hexbin(x='coord_x',
  1442. ... y='coord_y',
  1443. ... C='observations',
  1444. ... reduce_C_function=np.sum,
  1445. ... gridsize=10,
  1446. ... cmap="viridis")
  1447. """
  1448. if reduce_C_function is not None:
  1449. kwargs["reduce_C_function"] = reduce_C_function
  1450. if gridsize is not None:
  1451. kwargs["gridsize"] = gridsize
  1452. return self(kind="hexbin", x=x, y=y, C=C, **kwargs)
  1453. _backends: dict[str, types.ModuleType] = {}
  1454. def _load_backend(backend: str) -> types.ModuleType:
  1455. """
  1456. Load a pandas plotting backend.
  1457. Parameters
  1458. ----------
  1459. backend : str
  1460. The identifier for the backend. Either an entrypoint item registered
  1461. with importlib.metadata, "matplotlib", or a module name.
  1462. Returns
  1463. -------
  1464. types.ModuleType
  1465. The imported backend.
  1466. """
  1467. from importlib.metadata import entry_points
  1468. if backend == "matplotlib":
  1469. # Because matplotlib is an optional dependency and first-party backend,
  1470. # we need to attempt an import here to raise an ImportError if needed.
  1471. try:
  1472. module = importlib.import_module("pandas.plotting._matplotlib")
  1473. except ImportError:
  1474. raise ImportError(
  1475. "matplotlib is required for plotting when the "
  1476. 'default backend "matplotlib" is selected.'
  1477. ) from None
  1478. return module
  1479. found_backend = False
  1480. eps = entry_points()
  1481. key = "pandas_plotting_backends"
  1482. # entry_points lost dict API ~ PY 3.10
  1483. # https://github.com/python/importlib_metadata/issues/298
  1484. if hasattr(eps, "select"):
  1485. entry = eps.select(group=key) # pyright: ignore[reportGeneralTypeIssues]
  1486. else:
  1487. entry = eps.get(key, ())
  1488. for entry_point in entry:
  1489. found_backend = entry_point.name == backend
  1490. if found_backend:
  1491. module = entry_point.load()
  1492. break
  1493. if not found_backend:
  1494. # Fall back to unregistered, module name approach.
  1495. try:
  1496. module = importlib.import_module(backend)
  1497. found_backend = True
  1498. except ImportError:
  1499. # We re-raise later on.
  1500. pass
  1501. if found_backend:
  1502. if hasattr(module, "plot"):
  1503. # Validate that the interface is implemented when the option is set,
  1504. # rather than at plot time.
  1505. return module
  1506. raise ValueError(
  1507. f"Could not find plotting backend '{backend}'. Ensure that you've "
  1508. f"installed the package providing the '{backend}' entrypoint, or that "
  1509. "the package has a top-level `.plot` method."
  1510. )
  1511. def _get_plot_backend(backend: str | None = None):
  1512. """
  1513. Return the plotting backend to use (e.g. `pandas.plotting._matplotlib`).
  1514. The plotting system of pandas uses matplotlib by default, but the idea here
  1515. is that it can also work with other third-party backends. This function
  1516. returns the module which provides a top-level `.plot` method that will
  1517. actually do the plotting. The backend is specified from a string, which
  1518. either comes from the keyword argument `backend`, or, if not specified, from
  1519. the option `pandas.options.plotting.backend`. All the rest of the code in
  1520. this file uses the backend specified there for the plotting.
  1521. The backend is imported lazily, as matplotlib is a soft dependency, and
  1522. pandas can be used without it being installed.
  1523. Notes
  1524. -----
  1525. Modifies `_backends` with imported backend as a side effect.
  1526. """
  1527. backend_str: str = backend or get_option("plotting.backend")
  1528. if backend_str in _backends:
  1529. return _backends[backend_str]
  1530. module = _load_backend(backend_str)
  1531. _backends[backend_str] = module
  1532. return module