style_render.py 84 KB


  1. from __future__ import annotations
  2. from collections import defaultdict
  3. from functools import partial
  4. import re
  5. from typing import (
  6. Any,
  7. Callable,
  8. DefaultDict,
  9. Dict,
  10. List,
  11. Optional,
  12. Sequence,
  13. Tuple,
  14. TypedDict,
  15. Union,
  16. )
  17. from uuid import uuid4
  18. import numpy as np
  19. from pandas._config import get_option
  20. from pandas._libs import lib
  21. from pandas._typing import (
  22. Axis,
  23. Level,
  24. )
  25. from pandas.compat._optional import import_optional_dependency
  26. from pandas.core.dtypes.common import (
  27. is_complex,
  28. is_float,
  29. is_integer,
  30. )
  31. from pandas.core.dtypes.generic import ABCSeries
  32. from pandas import (
  33. DataFrame,
  34. Index,
  35. IndexSlice,
  36. MultiIndex,
  37. Series,
  38. isna,
  39. )
  40. from pandas.api.types import is_list_like
  41. import pandas.core.common as com
  42. jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.")
  43. from markupsafe import escape as escape_html # markupsafe is jinja2 dependency
  44. BaseFormatter = Union[str, Callable]
  45. ExtFormatter = Union[BaseFormatter, Dict[Any, Optional[BaseFormatter]]]
  46. CSSPair = Tuple[str, Union[str, float]]
  47. CSSList = List[CSSPair]
  48. CSSProperties = Union[str, CSSList]
  49. class CSSDict(TypedDict):
  50. selector: str
  51. props: CSSProperties
  52. CSSStyles = List[CSSDict]
  53. Subset = Union[slice, Sequence, Index]
  54. class StylerRenderer:
  55. """
  56. Base class to process rendering a Styler with a specified jinja2 template.
  57. """
  58. loader = jinja2.PackageLoader("pandas", "io/formats/templates")
  59. env = jinja2.Environment(loader=loader, trim_blocks=True)
  60. template_html = env.get_template("html.tpl")
  61. template_html_table = env.get_template("html_table.tpl")
  62. template_html_style = env.get_template("html_style.tpl")
  63. template_latex = env.get_template("latex.tpl")
  64. template_string = env.get_template("string.tpl")
  65. def __init__(
  66. self,
  67. data: DataFrame | Series,
  68. uuid: str | None = None,
  69. uuid_len: int = 5,
  70. table_styles: CSSStyles | None = None,
  71. table_attributes: str | None = None,
  72. caption: str | tuple | list | None = None,
  73. cell_ids: bool = True,
  74. precision: int | None = None,
  75. ) -> None:
  76. # validate ordered args
  77. if isinstance(data, Series):
  78. data = data.to_frame()
  79. if not isinstance(data, DataFrame):
  80. raise TypeError("``data`` must be a Series or DataFrame")
  81. self.data: DataFrame = data
  82. self.index: Index = data.index
  83. self.columns: Index = data.columns
  84. if not isinstance(uuid_len, int) or uuid_len < 0:
  85. raise TypeError("``uuid_len`` must be an integer in range [0, 32].")
  86. self.uuid = uuid or uuid4().hex[: min(32, uuid_len)]
  87. self.uuid_len = len(self.uuid)
  88. self.table_styles = table_styles
  89. self.table_attributes = table_attributes
  90. self.caption = caption
  91. self.cell_ids = cell_ids
  92. self.css = {
  93. "row_heading": "row_heading",
  94. "col_heading": "col_heading",
  95. "index_name": "index_name",
  96. "col": "col",
  97. "row": "row",
  98. "col_trim": "col_trim",
  99. "row_trim": "row_trim",
  100. "level": "level",
  101. "data": "data",
  102. "blank": "blank",
  103. "foot": "foot",
  104. }
  105. self.concatenated: list[StylerRenderer] = []
  106. # add rendering variables
  107. self.hide_index_names: bool = False
  108. self.hide_column_names: bool = False
  109. self.hide_index_: list = [False] * self.index.nlevels
  110. self.hide_columns_: list = [False] * self.columns.nlevels
  111. self.hidden_rows: Sequence[int] = [] # sequence for specific hidden rows/cols
  112. self.hidden_columns: Sequence[int] = []
  113. self.ctx: DefaultDict[tuple[int, int], CSSList] = defaultdict(list)
  114. self.ctx_index: DefaultDict[tuple[int, int], CSSList] = defaultdict(list)
  115. self.ctx_columns: DefaultDict[tuple[int, int], CSSList] = defaultdict(list)
  116. self.cell_context: DefaultDict[tuple[int, int], str] = defaultdict(str)
  117. self._todo: list[tuple[Callable, tuple, dict]] = []
  118. self.tooltips: Tooltips | None = None
  119. precision = (
  120. get_option("styler.format.precision") if precision is None else precision
  121. )
  122. self._display_funcs: DefaultDict[ # maps (row, col) -> format func
  123. tuple[int, int], Callable[[Any], str]
  124. ] = defaultdict(lambda: partial(_default_formatter, precision=precision))
  125. self._display_funcs_index: DefaultDict[ # maps (row, level) -> format func
  126. tuple[int, int], Callable[[Any], str]
  127. ] = defaultdict(lambda: partial(_default_formatter, precision=precision))
  128. self._display_funcs_columns: DefaultDict[ # maps (level, col) -> format func
  129. tuple[int, int], Callable[[Any], str]
  130. ] = defaultdict(lambda: partial(_default_formatter, precision=precision))
  131. def _render(
  132. self,
  133. sparse_index: bool,
  134. sparse_columns: bool,
  135. max_rows: int | None = None,
  136. max_cols: int | None = None,
  137. blank: str = "",
  138. ):
  139. """
  140. Computes and applies styles and then generates the general render dicts.
  141. Also extends the `ctx` and `ctx_index` attributes with those of concatenated
  142. stylers for use within `_translate_latex`
  143. """
  144. self._compute()
  145. dxs = []
  146. ctx_len = len(self.index)
  147. for i, concatenated in enumerate(self.concatenated):
  148. concatenated.hide_index_ = self.hide_index_
  149. concatenated.hidden_columns = self.hidden_columns
  150. foot = f"{self.css['foot']}{i}"
  151. concatenated.css = {
  152. **self.css,
  153. "data": f"{foot}_data",
  154. "row_heading": f"{foot}_row_heading",
  155. "row": f"{foot}_row",
  156. "foot": f"{foot}_foot",
  157. }
  158. dx = concatenated._render(
  159. sparse_index, sparse_columns, max_rows, max_cols, blank
  160. )
  161. dxs.append(dx)
  162. for (r, c), v in concatenated.ctx.items():
  163. self.ctx[(r + ctx_len, c)] = v
  164. for (r, c), v in concatenated.ctx_index.items():
  165. self.ctx_index[(r + ctx_len, c)] = v
  166. ctx_len += len(concatenated.index)
  167. d = self._translate(
  168. sparse_index, sparse_columns, max_rows, max_cols, blank, dxs
  169. )
  170. return d
  171. def _render_html(
  172. self,
  173. sparse_index: bool,
  174. sparse_columns: bool,
  175. max_rows: int | None = None,
  176. max_cols: int | None = None,
  177. **kwargs,
  178. ) -> str:
  179. """
  180. Renders the ``Styler`` including all applied styles to HTML.
  181. Generates a dict with necessary kwargs passed to jinja2 template.
  182. """
  183. d = self._render(sparse_index, sparse_columns, max_rows, max_cols, "&nbsp;")
  184. d.update(kwargs)
  185. return self.template_html.render(
  186. **d,
  187. html_table_tpl=self.template_html_table,
  188. html_style_tpl=self.template_html_style,
  189. )
  190. def _render_latex(
  191. self, sparse_index: bool, sparse_columns: bool, clines: str | None, **kwargs
  192. ) -> str:
  193. """
  194. Render a Styler in latex format
  195. """
  196. d = self._render(sparse_index, sparse_columns, None, None)
  197. self._translate_latex(d, clines=clines)
  198. self.template_latex.globals["parse_wrap"] = _parse_latex_table_wrapping
  199. self.template_latex.globals["parse_table"] = _parse_latex_table_styles
  200. self.template_latex.globals["parse_cell"] = _parse_latex_cell_styles
  201. self.template_latex.globals["parse_header"] = _parse_latex_header_span
  202. d.update(kwargs)
  203. return self.template_latex.render(**d)
  204. def _render_string(
  205. self,
  206. sparse_index: bool,
  207. sparse_columns: bool,
  208. max_rows: int | None = None,
  209. max_cols: int | None = None,
  210. **kwargs,
  211. ) -> str:
  212. """
  213. Render a Styler in string format
  214. """
  215. d = self._render(sparse_index, sparse_columns, max_rows, max_cols)
  216. d.update(kwargs)
  217. return self.template_string.render(**d)
  218. def _compute(self):
  219. """
  220. Execute the style functions built up in `self._todo`.
  221. Relies on the conventions that all style functions go through
  222. .apply or .applymap. The append styles to apply as tuples of
  223. (application method, *args, **kwargs)
  224. """
  225. self.ctx.clear()
  226. self.ctx_index.clear()
  227. self.ctx_columns.clear()
  228. r = self
  229. for func, args, kwargs in self._todo:
  230. r = func(self)(*args, **kwargs)
  231. return r
  232. def _translate(
  233. self,
  234. sparse_index: bool,
  235. sparse_cols: bool,
  236. max_rows: int | None = None,
  237. max_cols: int | None = None,
  238. blank: str = "&nbsp;",
  239. dxs: list[dict] | None = None,
  240. ):
  241. """
  242. Process Styler data and settings into a dict for template rendering.
  243. Convert data and settings from ``Styler`` attributes such as ``self.data``,
  244. ``self.tooltips`` including applying any methods in ``self._todo``.
  245. Parameters
  246. ----------
  247. sparse_index : bool
  248. Whether to sparsify the index or print all hierarchical index elements.
  249. Upstream defaults are typically to `pandas.options.styler.sparse.index`.
  250. sparse_cols : bool
  251. Whether to sparsify the columns or print all hierarchical column elements.
  252. Upstream defaults are typically to `pandas.options.styler.sparse.columns`.
  253. max_rows, max_cols : int, optional
  254. Specific max rows and cols. max_elements always take precedence in render.
  255. blank : str
  256. Entry to top-left blank cells.
  257. dxs : list[dict]
  258. The render dicts of the concatenated Stylers.
  259. Returns
  260. -------
  261. d : dict
  262. The following structure: {uuid, table_styles, caption, head, body,
  263. cellstyle, table_attributes}
  264. """
  265. if dxs is None:
  266. dxs = []
  267. self.css["blank_value"] = blank
  268. # construct render dict
  269. d = {
  270. "uuid": self.uuid,
  271. "table_styles": format_table_styles(self.table_styles or []),
  272. "caption": self.caption,
  273. }
  274. max_elements = get_option("styler.render.max_elements")
  275. max_rows = max_rows if max_rows else get_option("styler.render.max_rows")
  276. max_cols = max_cols if max_cols else get_option("styler.render.max_columns")
  277. max_rows, max_cols = _get_trimming_maximums(
  278. len(self.data.index),
  279. len(self.data.columns),
  280. max_elements,
  281. max_rows,
  282. max_cols,
  283. )
  284. self.cellstyle_map_columns: DefaultDict[
  285. tuple[CSSPair, ...], list[str]
  286. ] = defaultdict(list)
  287. head = self._translate_header(sparse_cols, max_cols)
  288. d.update({"head": head})
  289. # for sparsifying a MultiIndex and for use with latex clines
  290. idx_lengths = _get_level_lengths(
  291. self.index, sparse_index, max_rows, self.hidden_rows
  292. )
  293. d.update({"index_lengths": idx_lengths})
  294. self.cellstyle_map: DefaultDict[tuple[CSSPair, ...], list[str]] = defaultdict(
  295. list
  296. )
  297. self.cellstyle_map_index: DefaultDict[
  298. tuple[CSSPair, ...], list[str]
  299. ] = defaultdict(list)
  300. body: list = self._translate_body(idx_lengths, max_rows, max_cols)
  301. d.update({"body": body})
  302. ctx_maps = {
  303. "cellstyle": "cellstyle_map",
  304. "cellstyle_index": "cellstyle_map_index",
  305. "cellstyle_columns": "cellstyle_map_columns",
  306. } # add the cell_ids styles map to the render dictionary in right format
  307. for k, attr in ctx_maps.items():
  308. map = [
  309. {"props": list(props), "selectors": selectors}
  310. for props, selectors in getattr(self, attr).items()
  311. ]
  312. d.update({k: map})
  313. for dx in dxs: # self.concatenated is not empty
  314. d["body"].extend(dx["body"]) # type: ignore[union-attr]
  315. d["cellstyle"].extend(dx["cellstyle"]) # type: ignore[union-attr]
  316. d["cellstyle_index"].extend( # type: ignore[union-attr]
  317. dx["cellstyle_index"]
  318. )
  319. table_attr = self.table_attributes
  320. if not get_option("styler.html.mathjax"):
  321. table_attr = table_attr or ""
  322. if 'class="' in table_attr:
  323. table_attr = table_attr.replace('class="', 'class="tex2jax_ignore ')
  324. else:
  325. table_attr += ' class="tex2jax_ignore"'
  326. d.update({"table_attributes": table_attr})
  327. if self.tooltips:
  328. d = self.tooltips._translate(self, d)
  329. return d
  330. def _translate_header(self, sparsify_cols: bool, max_cols: int):
  331. """
  332. Build each <tr> within table <head> as a list
  333. Using the structure:
  334. +----------------------------+---------------+---------------------------+
  335. | index_blanks ... | column_name_0 | column_headers (level_0) |
  336. 1) | .. | .. | .. |
  337. | index_blanks ... | column_name_n | column_headers (level_n) |
  338. +----------------------------+---------------+---------------------------+
  339. 2) | index_names (level_0 to level_n) ... | column_blanks ... |
  340. +----------------------------+---------------+---------------------------+
  341. Parameters
  342. ----------
  343. sparsify_cols : bool
  344. Whether column_headers section will add colspan attributes (>1) to elements.
  345. max_cols : int
  346. Maximum number of columns to render. If exceeded will contain `...` filler.
  347. Returns
  348. -------
  349. head : list
  350. The associated HTML elements needed for template rendering.
  351. """
  352. # for sparsifying a MultiIndex
  353. col_lengths = _get_level_lengths(
  354. self.columns, sparsify_cols, max_cols, self.hidden_columns
  355. )
  356. clabels = self.data.columns.tolist()
  357. if self.data.columns.nlevels == 1:
  358. clabels = [[x] for x in clabels]
  359. clabels = list(zip(*clabels))
  360. head = []
  361. # 1) column headers
  362. for r, hide in enumerate(self.hide_columns_):
  363. if hide or not clabels:
  364. continue
  365. header_row = self._generate_col_header_row(
  366. (r, clabels), max_cols, col_lengths
  367. )
  368. head.append(header_row)
  369. # 2) index names
  370. if (
  371. self.data.index.names
  372. and com.any_not_none(*self.data.index.names)
  373. and not all(self.hide_index_)
  374. and not self.hide_index_names
  375. ):
  376. index_names_row = self._generate_index_names_row(
  377. clabels, max_cols, col_lengths
  378. )
  379. head.append(index_names_row)
  380. return head
  381. def _generate_col_header_row(self, iter: tuple, max_cols: int, col_lengths: dict):
  382. """
  383. Generate the row containing column headers:
  384. +----------------------------+---------------+---------------------------+
  385. | index_blanks ... | column_name_i | column_headers (level_i) |
  386. +----------------------------+---------------+---------------------------+
  387. Parameters
  388. ----------
  389. iter : tuple
  390. Looping variables from outer scope
  391. max_cols : int
  392. Permissible number of columns
  393. col_lengths :
  394. c
  395. Returns
  396. -------
  397. list of elements
  398. """
  399. r, clabels = iter
  400. # number of index blanks is governed by number of hidden index levels
  401. index_blanks = [
  402. _element("th", self.css["blank"], self.css["blank_value"], True)
  403. ] * (self.index.nlevels - sum(self.hide_index_) - 1)
  404. name = self.data.columns.names[r]
  405. column_name = [
  406. _element(
  407. "th",
  408. (
  409. f"{self.css['blank']} {self.css['level']}{r}"
  410. if name is None
  411. else f"{self.css['index_name']} {self.css['level']}{r}"
  412. ),
  413. name
  414. if (name is not None and not self.hide_column_names)
  415. else self.css["blank_value"],
  416. not all(self.hide_index_),
  417. )
  418. ]
  419. column_headers: list = []
  420. visible_col_count: int = 0
  421. for c, value in enumerate(clabels[r]):
  422. header_element_visible = _is_visible(c, r, col_lengths)
  423. if header_element_visible:
  424. visible_col_count += col_lengths.get((r, c), 0)
  425. if self._check_trim(
  426. visible_col_count,
  427. max_cols,
  428. column_headers,
  429. "th",
  430. f"{self.css['col_heading']} {self.css['level']}{r} "
  431. f"{self.css['col_trim']}",
  432. ):
  433. break
  434. header_element = _element(
  435. "th",
  436. (
  437. f"{self.css['col_heading']} {self.css['level']}{r} "
  438. f"{self.css['col']}{c}"
  439. ),
  440. value,
  441. header_element_visible,
  442. display_value=self._display_funcs_columns[(r, c)](value),
  443. attributes=(
  444. f'colspan="{col_lengths.get((r, c), 0)}"'
  445. if col_lengths.get((r, c), 0) > 1
  446. else ""
  447. ),
  448. )
  449. if self.cell_ids:
  450. header_element["id"] = f"{self.css['level']}{r}_{self.css['col']}{c}"
  451. if (
  452. header_element_visible
  453. and (r, c) in self.ctx_columns
  454. and self.ctx_columns[r, c]
  455. ):
  456. header_element["id"] = f"{self.css['level']}{r}_{self.css['col']}{c}"
  457. self.cellstyle_map_columns[tuple(self.ctx_columns[r, c])].append(
  458. f"{self.css['level']}{r}_{self.css['col']}{c}"
  459. )
  460. column_headers.append(header_element)
  461. return index_blanks + column_name + column_headers
  462. def _generate_index_names_row(self, iter: tuple, max_cols: int, col_lengths: dict):
  463. """
  464. Generate the row containing index names
  465. +----------------------------+---------------+---------------------------+
  466. | index_names (level_0 to level_n) ... | column_blanks ... |
  467. +----------------------------+---------------+---------------------------+
  468. Parameters
  469. ----------
  470. iter : tuple
  471. Looping variables from outer scope
  472. max_cols : int
  473. Permissible number of columns
  474. Returns
  475. -------
  476. list of elements
  477. """
  478. clabels = iter
  479. index_names = [
  480. _element(
  481. "th",
  482. f"{self.css['index_name']} {self.css['level']}{c}",
  483. self.css["blank_value"] if name is None else name,
  484. not self.hide_index_[c],
  485. )
  486. for c, name in enumerate(self.data.index.names)
  487. ]
  488. column_blanks: list = []
  489. visible_col_count: int = 0
  490. if clabels:
  491. last_level = self.columns.nlevels - 1 # use last level since never sparsed
  492. for c, value in enumerate(clabels[last_level]):
  493. header_element_visible = _is_visible(c, last_level, col_lengths)
  494. if header_element_visible:
  495. visible_col_count += 1
  496. if self._check_trim(
  497. visible_col_count,
  498. max_cols,
  499. column_blanks,
  500. "th",
  501. f"{self.css['blank']} {self.css['col']}{c} {self.css['col_trim']}",
  502. self.css["blank_value"],
  503. ):
  504. break
  505. column_blanks.append(
  506. _element(
  507. "th",
  508. f"{self.css['blank']} {self.css['col']}{c}",
  509. self.css["blank_value"],
  510. c not in self.hidden_columns,
  511. )
  512. )
  513. return index_names + column_blanks
  514. def _translate_body(self, idx_lengths: dict, max_rows: int, max_cols: int):
  515. """
  516. Build each <tr> within table <body> as a list
  517. Use the following structure:
  518. +--------------------------------------------+---------------------------+
  519. | index_header_0 ... index_header_n | data_by_column ... |
  520. +--------------------------------------------+---------------------------+
  521. Also add elements to the cellstyle_map for more efficient grouped elements in
  522. <style></style> block
  523. Parameters
  524. ----------
  525. sparsify_index : bool
  526. Whether index_headers section will add rowspan attributes (>1) to elements.
  527. Returns
  528. -------
  529. body : list
  530. The associated HTML elements needed for template rendering.
  531. """
  532. rlabels = self.data.index.tolist()
  533. if not isinstance(self.data.index, MultiIndex):
  534. rlabels = [[x] for x in rlabels]
  535. body: list = []
  536. visible_row_count: int = 0
  537. for r, row_tup in [
  538. z for z in enumerate(self.data.itertuples()) if z[0] not in self.hidden_rows
  539. ]:
  540. visible_row_count += 1
  541. if self._check_trim(
  542. visible_row_count,
  543. max_rows,
  544. body,
  545. "row",
  546. ):
  547. break
  548. body_row = self._generate_body_row(
  549. (r, row_tup, rlabels), max_cols, idx_lengths
  550. )
  551. body.append(body_row)
  552. return body
  553. def _check_trim(
  554. self,
  555. count: int,
  556. max: int,
  557. obj: list,
  558. element: str,
  559. css: str | None = None,
  560. value: str = "...",
  561. ) -> bool:
  562. """
  563. Indicates whether to break render loops and append a trimming indicator
  564. Parameters
  565. ----------
  566. count : int
  567. The loop count of previous visible items.
  568. max : int
  569. The allowable rendered items in the loop.
  570. obj : list
  571. The current render collection of the rendered items.
  572. element : str
  573. The type of element to append in the case a trimming indicator is needed.
  574. css : str, optional
  575. The css to add to the trimming indicator element.
  576. value : str, optional
  577. The value of the elements display if necessary.
  578. Returns
  579. -------
  580. result : bool
  581. Whether a trimming element was required and appended.
  582. """
  583. if count > max:
  584. if element == "row":
  585. obj.append(self._generate_trimmed_row(max))
  586. else:
  587. obj.append(_element(element, css, value, True, attributes=""))
  588. return True
  589. return False
  590. def _generate_trimmed_row(self, max_cols: int) -> list:
  591. """
  592. When a render has too many rows we generate a trimming row containing "..."
  593. Parameters
  594. ----------
  595. max_cols : int
  596. Number of permissible columns
  597. Returns
  598. -------
  599. list of elements
  600. """
  601. index_headers = [
  602. _element(
  603. "th",
  604. (
  605. f"{self.css['row_heading']} {self.css['level']}{c} "
  606. f"{self.css['row_trim']}"
  607. ),
  608. "...",
  609. not self.hide_index_[c],
  610. attributes="",
  611. )
  612. for c in range(self.data.index.nlevels)
  613. ]
  614. data: list = []
  615. visible_col_count: int = 0
  616. for c, _ in enumerate(self.columns):
  617. data_element_visible = c not in self.hidden_columns
  618. if data_element_visible:
  619. visible_col_count += 1
  620. if self._check_trim(
  621. visible_col_count,
  622. max_cols,
  623. data,
  624. "td",
  625. f"{self.css['data']} {self.css['row_trim']} {self.css['col_trim']}",
  626. ):
  627. break
  628. data.append(
  629. _element(
  630. "td",
  631. f"{self.css['data']} {self.css['col']}{c} {self.css['row_trim']}",
  632. "...",
  633. data_element_visible,
  634. attributes="",
  635. )
  636. )
  637. return index_headers + data
  638. def _generate_body_row(
  639. self,
  640. iter: tuple,
  641. max_cols: int,
  642. idx_lengths: dict,
  643. ):
  644. """
  645. Generate a regular row for the body section of appropriate format.
  646. +--------------------------------------------+---------------------------+
  647. | index_header_0 ... index_header_n | data_by_column ... |
  648. +--------------------------------------------+---------------------------+
  649. Parameters
  650. ----------
  651. iter : tuple
  652. Iterable from outer scope: row number, row data tuple, row index labels.
  653. max_cols : int
  654. Number of permissible columns.
  655. idx_lengths : dict
  656. A map of the sparsification structure of the index
  657. Returns
  658. -------
  659. list of elements
  660. """
  661. r, row_tup, rlabels = iter
  662. index_headers = []
  663. for c, value in enumerate(rlabels[r]):
  664. header_element_visible = (
  665. _is_visible(r, c, idx_lengths) and not self.hide_index_[c]
  666. )
  667. header_element = _element(
  668. "th",
  669. (
  670. f"{self.css['row_heading']} {self.css['level']}{c} "
  671. f"{self.css['row']}{r}"
  672. ),
  673. value,
  674. header_element_visible,
  675. display_value=self._display_funcs_index[(r, c)](value),
  676. attributes=(
  677. f'rowspan="{idx_lengths.get((c, r), 0)}"'
  678. if idx_lengths.get((c, r), 0) > 1
  679. else ""
  680. ),
  681. )
  682. if self.cell_ids:
  683. header_element[
  684. "id"
  685. ] = f"{self.css['level']}{c}_{self.css['row']}{r}" # id is given
  686. if (
  687. header_element_visible
  688. and (r, c) in self.ctx_index
  689. and self.ctx_index[r, c]
  690. ):
  691. # always add id if a style is specified
  692. header_element["id"] = f"{self.css['level']}{c}_{self.css['row']}{r}"
  693. self.cellstyle_map_index[tuple(self.ctx_index[r, c])].append(
  694. f"{self.css['level']}{c}_{self.css['row']}{r}"
  695. )
  696. index_headers.append(header_element)
  697. data: list = []
  698. visible_col_count: int = 0
  699. for c, value in enumerate(row_tup[1:]):
  700. data_element_visible = (
  701. c not in self.hidden_columns and r not in self.hidden_rows
  702. )
  703. if data_element_visible:
  704. visible_col_count += 1
  705. if self._check_trim(
  706. visible_col_count,
  707. max_cols,
  708. data,
  709. "td",
  710. f"{self.css['data']} {self.css['row']}{r} {self.css['col_trim']}",
  711. ):
  712. break
  713. # add custom classes from cell context
  714. cls = ""
  715. if (r, c) in self.cell_context:
  716. cls = " " + self.cell_context[r, c]
  717. data_element = _element(
  718. "td",
  719. (
  720. f"{self.css['data']} {self.css['row']}{r} "
  721. f"{self.css['col']}{c}{cls}"
  722. ),
  723. value,
  724. data_element_visible,
  725. attributes="",
  726. display_value=self._display_funcs[(r, c)](value),
  727. )
  728. if self.cell_ids:
  729. data_element["id"] = f"{self.css['row']}{r}_{self.css['col']}{c}"
  730. if data_element_visible and (r, c) in self.ctx and self.ctx[r, c]:
  731. # always add id if needed due to specified style
  732. data_element["id"] = f"{self.css['row']}{r}_{self.css['col']}{c}"
  733. self.cellstyle_map[tuple(self.ctx[r, c])].append(
  734. f"{self.css['row']}{r}_{self.css['col']}{c}"
  735. )
  736. data.append(data_element)
  737. return index_headers + data
  738. def _translate_latex(self, d: dict, clines: str | None) -> None:
  739. r"""
  740. Post-process the default render dict for the LaTeX template format.
  741. Processing items included are:
  742. - Remove hidden columns from the non-headers part of the body.
  743. - Place cellstyles directly in td cells rather than use cellstyle_map.
  744. - Remove hidden indexes or reinsert missing th elements if part of multiindex
  745. or multirow sparsification (so that \multirow and \multicol work correctly).
  746. """
  747. index_levels = self.index.nlevels
  748. visible_index_level_n = index_levels - sum(self.hide_index_)
  749. d["head"] = [
  750. [
  751. {**col, "cellstyle": self.ctx_columns[r, c - visible_index_level_n]}
  752. for c, col in enumerate(row)
  753. if col["is_visible"]
  754. ]
  755. for r, row in enumerate(d["head"])
  756. ]
  757. def _concatenated_visible_rows(obj, n, row_indices):
  758. """
  759. Extract all visible row indices recursively from concatenated stylers.
  760. """
  761. row_indices.extend(
  762. [r + n for r in range(len(obj.index)) if r not in obj.hidden_rows]
  763. )
  764. n += len(obj.index)
  765. for concatenated in obj.concatenated:
  766. n = _concatenated_visible_rows(concatenated, n, row_indices)
  767. return n
  768. def concatenated_visible_rows(obj):
  769. row_indices: list[int] = []
  770. _concatenated_visible_rows(obj, 0, row_indices)
  771. # TODO try to consolidate the concat visible rows
  772. # methods to a single function / recursion for simplicity
  773. return row_indices
  774. body = []
  775. for r, row in zip(concatenated_visible_rows(self), d["body"]):
  776. # note: cannot enumerate d["body"] because rows were dropped if hidden
  777. # during _translate_body so must zip to acquire the true r-index associated
  778. # with the ctx obj which contains the cell styles.
  779. if all(self.hide_index_):
  780. row_body_headers = []
  781. else:
  782. row_body_headers = [
  783. {
  784. **col,
  785. "display_value": col["display_value"]
  786. if col["is_visible"]
  787. else "",
  788. "cellstyle": self.ctx_index[r, c],
  789. }
  790. for c, col in enumerate(row[:index_levels])
  791. if (col["type"] == "th" and not self.hide_index_[c])
  792. ]
  793. row_body_cells = [
  794. {**col, "cellstyle": self.ctx[r, c]}
  795. for c, col in enumerate(row[index_levels:])
  796. if (col["is_visible"] and col["type"] == "td")
  797. ]
  798. body.append(row_body_headers + row_body_cells)
  799. d["body"] = body
  800. # clines are determined from info on index_lengths and hidden_rows and input
  801. # to a dict defining which row clines should be added in the template.
  802. if clines not in [
  803. None,
  804. "all;data",
  805. "all;index",
  806. "skip-last;data",
  807. "skip-last;index",
  808. ]:
  809. raise ValueError(
  810. f"`clines` value of {clines} is invalid. Should either be None or one "
  811. f"of 'all;data', 'all;index', 'skip-last;data', 'skip-last;index'."
  812. )
  813. if clines is not None:
  814. data_len = len(row_body_cells) if "data" in clines and d["body"] else 0
  815. d["clines"] = defaultdict(list)
  816. visible_row_indexes: list[int] = [
  817. r for r in range(len(self.data.index)) if r not in self.hidden_rows
  818. ]
  819. visible_index_levels: list[int] = [
  820. i for i in range(index_levels) if not self.hide_index_[i]
  821. ]
  822. for rn, r in enumerate(visible_row_indexes):
  823. for lvln, lvl in enumerate(visible_index_levels):
  824. if lvl == index_levels - 1 and "skip-last" in clines:
  825. continue
  826. idx_len = d["index_lengths"].get((lvl, r), None)
  827. if idx_len is not None: # i.e. not a sparsified entry
  828. d["clines"][rn + idx_len].append(
  829. f"\\cline{{{lvln+1}-{len(visible_index_levels)+data_len}}}"
  830. )
  831. def format(
  832. self,
  833. formatter: ExtFormatter | None = None,
  834. subset: Subset | None = None,
  835. na_rep: str | None = None,
  836. precision: int | None = None,
  837. decimal: str = ".",
  838. thousands: str | None = None,
  839. escape: str | None = None,
  840. hyperlinks: str | None = None,
  841. ) -> StylerRenderer:
  842. r"""
  843. Format the text display value of cells.
  844. Parameters
  845. ----------
  846. formatter : str, callable, dict or None
  847. Object to define how values are displayed. See notes.
  848. subset : label, array-like, IndexSlice, optional
  849. A valid 2d input to `DataFrame.loc[<subset>]`, or, in the case of a 1d input
  850. or single key, to `DataFrame.loc[:, <subset>]` where the columns are
  851. prioritised, to limit ``data`` to *before* applying the function.
  852. na_rep : str, optional
  853. Representation for missing values.
  854. If ``na_rep`` is None, no special formatting is applied.
  855. precision : int, optional
  856. Floating point precision to use for display purposes, if not determined by
  857. the specified ``formatter``.
  858. .. versionadded:: 1.3.0
  859. decimal : str, default "."
  860. Character used as decimal separator for floats, complex and integers.
  861. .. versionadded:: 1.3.0
  862. thousands : str, optional, default None
  863. Character used as thousands separator for floats, complex and integers.
  864. .. versionadded:: 1.3.0
  865. escape : str, optional
  866. Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"``
  867. in cell display string with HTML-safe sequences.
  868. Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``,
  869. ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
  870. LaTeX-safe sequences.
  871. Escaping is done before ``formatter``.
  872. .. versionadded:: 1.3.0
  873. hyperlinks : {"html", "latex"}, optional
  874. Convert string patterns containing https://, http://, ftp:// or www. to
  875. HTML <a> tags as clickable URL hyperlinks if "html", or LaTeX \href
  876. commands if "latex".
  877. .. versionadded:: 1.4.0
  878. Returns
  879. -------
  880. Styler
  881. See Also
  882. --------
  883. Styler.format_index: Format the text display value of index labels.
  884. Notes
  885. -----
  886. This method assigns a formatting function, ``formatter``, to each cell in the
  887. DataFrame. If ``formatter`` is ``None``, then the default formatter is used.
  888. If a callable then that function should take a data value as input and return
  889. a displayable representation, such as a string. If ``formatter`` is
  890. given as a string this is assumed to be a valid Python format specification
  891. and is wrapped to a callable as ``string.format(x)``. If a ``dict`` is given,
  892. keys should correspond to column names, and values should be string or
  893. callable, as above.
  894. The default formatter currently expresses floats and complex numbers with the
  895. pandas display precision unless using the ``precision`` argument here. The
  896. default formatter does not adjust the representation of missing values unless
  897. the ``na_rep`` argument is used.
  898. The ``subset`` argument defines which region to apply the formatting function
  899. to. If the ``formatter`` argument is given in dict form but does not include
  900. all columns within the subset then these columns will have the default formatter
  901. applied. Any columns in the formatter dict excluded from the subset will
  902. be ignored.
  903. When using a ``formatter`` string the dtypes must be compatible, otherwise a
  904. `ValueError` will be raised.
  905. When instantiating a Styler, default formatting can be applied be setting the
  906. ``pandas.options``:
  907. - ``styler.format.formatter``: default None.
  908. - ``styler.format.na_rep``: default None.
  909. - ``styler.format.precision``: default 6.
  910. - ``styler.format.decimal``: default ".".
  911. - ``styler.format.thousands``: default None.
  912. - ``styler.format.escape``: default None.
  913. .. warning::
  914. `Styler.format` is ignored when using the output format `Styler.to_excel`,
  915. since Excel and Python have inherrently different formatting structures.
  916. However, it is possible to use the `number-format` pseudo CSS attribute
  917. to force Excel permissible formatting. See examples.
  918. Examples
  919. --------
  920. Using ``na_rep`` and ``precision`` with the default ``formatter``
  921. >>> df = pd.DataFrame([[np.nan, 1.0, 'A'], [2.0, np.nan, 3.0]])
  922. >>> df.style.format(na_rep='MISS', precision=3) # doctest: +SKIP
  923. 0 1 2
  924. 0 MISS 1.000 A
  925. 1 2.000 MISS 3.000
  926. Using a ``formatter`` specification on consistent column dtypes
  927. >>> df.style.format('{:.2f}', na_rep='MISS', subset=[0,1]) # doctest: +SKIP
  928. 0 1 2
  929. 0 MISS 1.00 A
  930. 1 2.00 MISS 3.000000
  931. Using the default ``formatter`` for unspecified columns
  932. >>> df.style.format({0: '{:.2f}', 1: '£ {:.1f}'}, na_rep='MISS', precision=1)
  933. ... # doctest: +SKIP
  934. 0 1 2
  935. 0 MISS £ 1.0 A
  936. 1 2.00 MISS 3.0
  937. Multiple ``na_rep`` or ``precision`` specifications under the default
  938. ``formatter``.
  939. >>> (df.style.format(na_rep='MISS', precision=1, subset=[0])
  940. ... .format(na_rep='PASS', precision=2, subset=[1, 2])) # doctest: +SKIP
  941. 0 1 2
  942. 0 MISS 1.00 A
  943. 1 2.0 PASS 3.00
  944. Using a callable ``formatter`` function.
  945. >>> func = lambda s: 'STRING' if isinstance(s, str) else 'FLOAT'
  946. >>> df.style.format({0: '{:.1f}', 2: func}, precision=4, na_rep='MISS')
  947. ... # doctest: +SKIP
  948. 0 1 2
  949. 0 MISS 1.0000 STRING
  950. 1 2.0 MISS FLOAT
  951. Using a ``formatter`` with HTML ``escape`` and ``na_rep``.
  952. >>> df = pd.DataFrame([['<div></div>', '"A&B"', None]])
  953. >>> s = df.style.format(
  954. ... '<a href="a.com/{0}">{0}</a>', escape="html", na_rep="NA"
  955. ... )
  956. >>> s.to_html() # doctest: +SKIP
  957. ...
  958. <td .. ><a href="a.com/&lt;div&gt;&lt;/div&gt;">&lt;div&gt;&lt;/div&gt;</a></td>
  959. <td .. ><a href="a.com/&#34;A&amp;B&#34;">&#34;A&amp;B&#34;</a></td>
  960. <td .. >NA</td>
  961. ...
  962. Using a ``formatter`` with LaTeX ``escape``.
  963. >>> df = pd.DataFrame([["123"], ["~ ^"], ["$%#"]])
  964. >>> df.style.format("\\textbf{{{}}}", escape="latex").to_latex()
  965. ... # doctest: +SKIP
  966. \begin{tabular}{ll}
  967. {} & {0} \\
  968. 0 & \textbf{123} \\
  969. 1 & \textbf{\textasciitilde \space \textasciicircum } \\
  970. 2 & \textbf{\$\%\#} \\
  971. \end{tabular}
  972. Pandas defines a `number-format` pseudo CSS attribute instead of the `.format`
  973. method to create `to_excel` permissible formatting. Note that semi-colons are
  974. CSS protected characters but used as separators in Excel's format string.
  975. Replace semi-colons with the section separator character (ASCII-245) when
  976. defining the formatting here.
  977. >>> df = pd.DataFrame({"A": [1, 0, -1]})
  978. >>> pseudo_css = "number-format: 0§[Red](0)§-§@;"
  979. >>> filename = "formatted_file.xlsx"
  980. >>> df.style.applymap(lambda v: pseudo_css).to_excel(filename) # doctest: +SKIP
  981. .. figure:: ../../_static/style/format_excel_css.png
  982. """
  983. if all(
  984. (
  985. formatter is None,
  986. subset is None,
  987. precision is None,
  988. decimal == ".",
  989. thousands is None,
  990. na_rep is None,
  991. escape is None,
  992. hyperlinks is None,
  993. )
  994. ):
  995. self._display_funcs.clear()
  996. return self # clear the formatter / revert to default and avoid looping
  997. subset = slice(None) if subset is None else subset
  998. subset = non_reducing_slice(subset)
  999. data = self.data.loc[subset]
  1000. if not isinstance(formatter, dict):
  1001. formatter = {col: formatter for col in data.columns}
  1002. cis = self.columns.get_indexer_for(data.columns)
  1003. ris = self.index.get_indexer_for(data.index)
  1004. for ci in cis:
  1005. format_func = _maybe_wrap_formatter(
  1006. formatter.get(self.columns[ci]),
  1007. na_rep=na_rep,
  1008. precision=precision,
  1009. decimal=decimal,
  1010. thousands=thousands,
  1011. escape=escape,
  1012. hyperlinks=hyperlinks,
  1013. )
  1014. for ri in ris:
  1015. self._display_funcs[(ri, ci)] = format_func
  1016. return self
  1017. def format_index(
  1018. self,
  1019. formatter: ExtFormatter | None = None,
  1020. axis: Axis = 0,
  1021. level: Level | list[Level] | None = None,
  1022. na_rep: str | None = None,
  1023. precision: int | None = None,
  1024. decimal: str = ".",
  1025. thousands: str | None = None,
  1026. escape: str | None = None,
  1027. hyperlinks: str | None = None,
  1028. ) -> StylerRenderer:
  1029. r"""
  1030. Format the text display value of index labels or column headers.
  1031. .. versionadded:: 1.4.0
  1032. Parameters
  1033. ----------
  1034. formatter : str, callable, dict or None
  1035. Object to define how values are displayed. See notes.
  1036. axis : {0, "index", 1, "columns"}
  1037. Whether to apply the formatter to the index or column headers.
  1038. level : int, str, list
  1039. The level(s) over which to apply the generic formatter.
  1040. na_rep : str, optional
  1041. Representation for missing values.
  1042. If ``na_rep`` is None, no special formatting is applied.
  1043. precision : int, optional
  1044. Floating point precision to use for display purposes, if not determined by
  1045. the specified ``formatter``.
  1046. decimal : str, default "."
  1047. Character used as decimal separator for floats, complex and integers.
  1048. thousands : str, optional, default None
  1049. Character used as thousands separator for floats, complex and integers.
  1050. escape : str, optional
  1051. Use 'html' to replace the characters ``&``, ``<``, ``>``, ``'``, and ``"``
  1052. in cell display string with HTML-safe sequences.
  1053. Use 'latex' to replace the characters ``&``, ``%``, ``$``, ``#``, ``_``,
  1054. ``{``, ``}``, ``~``, ``^``, and ``\`` in the cell display string with
  1055. LaTeX-safe sequences.
  1056. Escaping is done before ``formatter``.
  1057. hyperlinks : {"html", "latex"}, optional
  1058. Convert string patterns containing https://, http://, ftp:// or www. to
  1059. HTML <a> tags as clickable URL hyperlinks if "html", or LaTeX \href
  1060. commands if "latex".
  1061. Returns
  1062. -------
  1063. Styler
  1064. See Also
  1065. --------
  1066. Styler.format: Format the text display value of data cells.
  1067. Notes
  1068. -----
  1069. This method assigns a formatting function, ``formatter``, to each level label
  1070. in the DataFrame's index or column headers. If ``formatter`` is ``None``,
  1071. then the default formatter is used.
  1072. If a callable then that function should take a label value as input and return
  1073. a displayable representation, such as a string. If ``formatter`` is
  1074. given as a string this is assumed to be a valid Python format specification
  1075. and is wrapped to a callable as ``string.format(x)``. If a ``dict`` is given,
  1076. keys should correspond to MultiIndex level numbers or names, and values should
  1077. be string or callable, as above.
  1078. The default formatter currently expresses floats and complex numbers with the
  1079. pandas display precision unless using the ``precision`` argument here. The
  1080. default formatter does not adjust the representation of missing values unless
  1081. the ``na_rep`` argument is used.
  1082. The ``level`` argument defines which levels of a MultiIndex to apply the
  1083. method to. If the ``formatter`` argument is given in dict form but does
  1084. not include all levels within the level argument then these unspecified levels
  1085. will have the default formatter applied. Any levels in the formatter dict
  1086. specifically excluded from the level argument will be ignored.
  1087. When using a ``formatter`` string the dtypes must be compatible, otherwise a
  1088. `ValueError` will be raised.
  1089. .. warning::
  1090. `Styler.format_index` is ignored when using the output format
  1091. `Styler.to_excel`, since Excel and Python have inherrently different
  1092. formatting structures.
  1093. However, it is possible to use the `number-format` pseudo CSS attribute
  1094. to force Excel permissible formatting. See documentation for `Styler.format`.
  1095. Examples
  1096. --------
  1097. Using ``na_rep`` and ``precision`` with the default ``formatter``
  1098. >>> df = pd.DataFrame([[1, 2, 3]], columns=[2.0, np.nan, 4.0])
  1099. >>> df.style.format_index(axis=1, na_rep='MISS', precision=3) # doctest: +SKIP
  1100. 2.000 MISS 4.000
  1101. 0 1 2 3
  1102. Using a ``formatter`` specification on consistent dtypes in a level
  1103. >>> df.style.format_index('{:.2f}', axis=1, na_rep='MISS') # doctest: +SKIP
  1104. 2.00 MISS 4.00
  1105. 0 1 2 3
  1106. Using the default ``formatter`` for unspecified levels
  1107. >>> df = pd.DataFrame([[1, 2, 3]],
  1108. ... columns=pd.MultiIndex.from_arrays([["a", "a", "b"],[2, np.nan, 4]]))
  1109. >>> df.style.format_index({0: lambda v: upper(v)}, axis=1, precision=1)
  1110. ... # doctest: +SKIP
  1111. A B
  1112. 2.0 nan 4.0
  1113. 0 1 2 3
  1114. Using a callable ``formatter`` function.
  1115. >>> func = lambda s: 'STRING' if isinstance(s, str) else 'FLOAT'
  1116. >>> df.style.format_index(func, axis=1, na_rep='MISS')
  1117. ... # doctest: +SKIP
  1118. STRING STRING
  1119. FLOAT MISS FLOAT
  1120. 0 1 2 3
  1121. Using a ``formatter`` with HTML ``escape`` and ``na_rep``.
  1122. >>> df = pd.DataFrame([[1, 2, 3]], columns=['"A"', 'A&B', None])
  1123. >>> s = df.style.format_index('$ {0}', axis=1, escape="html", na_rep="NA")
  1124. ... # doctest: +SKIP
  1125. <th .. >$ &#34;A&#34;</th>
  1126. <th .. >$ A&amp;B</th>
  1127. <th .. >NA</td>
  1128. ...
  1129. Using a ``formatter`` with LaTeX ``escape``.
  1130. >>> df = pd.DataFrame([[1, 2, 3]], columns=["123", "~", "$%#"])
  1131. >>> df.style.format_index("\\textbf{{{}}}", escape="latex", axis=1).to_latex()
  1132. ... # doctest: +SKIP
  1133. \begin{tabular}{lrrr}
  1134. {} & {\textbf{123}} & {\textbf{\textasciitilde }} & {\textbf{\$\%\#}} \\
  1135. 0 & 1 & 2 & 3 \\
  1136. \end{tabular}
  1137. """
  1138. axis = self.data._get_axis_number(axis)
  1139. if axis == 0:
  1140. display_funcs_, obj = self._display_funcs_index, self.index
  1141. else:
  1142. display_funcs_, obj = self._display_funcs_columns, self.columns
  1143. levels_ = refactor_levels(level, obj)
  1144. if all(
  1145. (
  1146. formatter is None,
  1147. level is None,
  1148. precision is None,
  1149. decimal == ".",
  1150. thousands is None,
  1151. na_rep is None,
  1152. escape is None,
  1153. hyperlinks is None,
  1154. )
  1155. ):
  1156. display_funcs_.clear()
  1157. return self # clear the formatter / revert to default and avoid looping
  1158. if not isinstance(formatter, dict):
  1159. formatter = {level: formatter for level in levels_}
  1160. else:
  1161. formatter = {
  1162. obj._get_level_number(level): formatter_
  1163. for level, formatter_ in formatter.items()
  1164. }
  1165. for lvl in levels_:
  1166. format_func = _maybe_wrap_formatter(
  1167. formatter.get(lvl),
  1168. na_rep=na_rep,
  1169. precision=precision,
  1170. decimal=decimal,
  1171. thousands=thousands,
  1172. escape=escape,
  1173. hyperlinks=hyperlinks,
  1174. )
  1175. for idx in [(i, lvl) if axis == 0 else (lvl, i) for i in range(len(obj))]:
  1176. display_funcs_[idx] = format_func
  1177. return self
  1178. def relabel_index(
  1179. self,
  1180. labels: Sequence | Index,
  1181. axis: Axis = 0,
  1182. level: Level | list[Level] | None = None,
  1183. ) -> StylerRenderer:
  1184. r"""
  1185. Relabel the index, or column header, keys to display a set of specified values.
  1186. .. versionadded:: 1.5.0
  1187. Parameters
  1188. ----------
  1189. labels : list-like or Index
  1190. New labels to display. Must have same length as the underlying values not
  1191. hidden.
  1192. axis : {"index", 0, "columns", 1}
  1193. Apply to the index or columns.
  1194. level : int, str, list, optional
  1195. The level(s) over which to apply the new labels. If `None` will apply
  1196. to all levels of an Index or MultiIndex which are not hidden.
  1197. Returns
  1198. -------
  1199. Styler
  1200. See Also
  1201. --------
  1202. Styler.format_index: Format the text display value of index or column headers.
  1203. Styler.hide: Hide the index, column headers, or specified data from display.
  1204. Notes
  1205. -----
  1206. As part of Styler, this method allows the display of an index to be
  1207. completely user-specified without affecting the underlying DataFrame data,
  1208. index, or column headers. This means that the flexibility of indexing is
  1209. maintained whilst the final display is customisable.
  1210. Since Styler is designed to be progressively constructed with method chaining,
  1211. this method is adapted to react to the **currently specified hidden elements**.
  1212. This is useful because it means one does not have to specify all the new
  1213. labels if the majority of an index, or column headers, have already been hidden.
  1214. The following produce equivalent display (note the length of ``labels`` in
  1215. each case).
  1216. .. code-block:: python
  1217. # relabel first, then hide
  1218. df = pd.DataFrame({"col": ["a", "b", "c"]})
  1219. df.style.relabel_index(["A", "B", "C"]).hide([0,1])
  1220. # hide first, then relabel
  1221. df = pd.DataFrame({"col": ["a", "b", "c"]})
  1222. df.style.hide([0,1]).relabel_index(["C"])
  1223. This method should be used, rather than :meth:`Styler.format_index`, in one of
  1224. the following cases (see examples):
  1225. - A specified set of labels are required which are not a function of the
  1226. underlying index keys.
  1227. - The function of the underlying index keys requires a counter variable,
  1228. such as those available upon enumeration.
  1229. Examples
  1230. --------
  1231. Basic use
  1232. >>> df = pd.DataFrame({"col": ["a", "b", "c"]})
  1233. >>> df.style.relabel_index(["A", "B", "C"]) # doctest: +SKIP
  1234. col
  1235. A a
  1236. B b
  1237. C c
  1238. Chaining with pre-hidden elements
  1239. >>> df.style.hide([0,1]).relabel_index(["C"]) # doctest: +SKIP
  1240. col
  1241. C c
  1242. Using a MultiIndex
  1243. >>> midx = pd.MultiIndex.from_product([[0, 1], [0, 1], [0, 1]])
  1244. >>> df = pd.DataFrame({"col": list(range(8))}, index=midx)
  1245. >>> styler = df.style # doctest: +SKIP
  1246. col
  1247. 0 0 0 0
  1248. 1 1
  1249. 1 0 2
  1250. 1 3
  1251. 1 0 0 4
  1252. 1 5
  1253. 1 0 6
  1254. 1 7
  1255. >>> styler.hide((midx.get_level_values(0)==0)|(midx.get_level_values(1)==0))
  1256. ... # doctest: +SKIP
  1257. >>> styler.hide(level=[0,1]) # doctest: +SKIP
  1258. >>> styler.relabel_index(["binary6", "binary7"]) # doctest: +SKIP
  1259. col
  1260. binary6 6
  1261. binary7 7
  1262. We can also achieve the above by indexing first and then re-labeling
  1263. >>> styler = df.loc[[(1,1,0), (1,1,1)]].style
  1264. >>> styler.hide(level=[0,1]).relabel_index(["binary6", "binary7"])
  1265. ... # doctest: +SKIP
  1266. col
  1267. binary6 6
  1268. binary7 7
  1269. Defining a formatting function which uses an enumeration counter. Also note
  1270. that the value of the index key is passed in the case of string labels so it
  1271. can also be inserted into the label, using curly brackets (or double curly
  1272. brackets if the string if pre-formatted),
  1273. >>> df = pd.DataFrame({"samples": np.random.rand(10)})
  1274. >>> styler = df.loc[np.random.randint(0,10,3)].style
  1275. >>> styler.relabel_index([f"sample{i+1} ({{}})" for i in range(3)])
  1276. ... # doctest: +SKIP
  1277. samples
  1278. sample1 (5) 0.315811
  1279. sample2 (0) 0.495941
  1280. sample3 (2) 0.067946
  1281. """
  1282. axis = self.data._get_axis_number(axis)
  1283. if axis == 0:
  1284. display_funcs_, obj = self._display_funcs_index, self.index
  1285. hidden_labels, hidden_lvls = self.hidden_rows, self.hide_index_
  1286. else:
  1287. display_funcs_, obj = self._display_funcs_columns, self.columns
  1288. hidden_labels, hidden_lvls = self.hidden_columns, self.hide_columns_
  1289. visible_len = len(obj) - len(set(hidden_labels))
  1290. if len(labels) != visible_len:
  1291. raise ValueError(
  1292. "``labels`` must be of length equal to the number of "
  1293. f"visible labels along ``axis`` ({visible_len})."
  1294. )
  1295. if level is None:
  1296. level = [i for i in range(obj.nlevels) if not hidden_lvls[i]]
  1297. levels_ = refactor_levels(level, obj)
  1298. def alias_(x, value):
  1299. if isinstance(value, str):
  1300. return value.format(x)
  1301. return value
  1302. for ai, i in enumerate([i for i in range(len(obj)) if i not in hidden_labels]):
  1303. if len(levels_) == 1:
  1304. idx = (i, levels_[0]) if axis == 0 else (levels_[0], i)
  1305. display_funcs_[idx] = partial(alias_, value=labels[ai])
  1306. else:
  1307. for aj, lvl in enumerate(levels_):
  1308. idx = (i, lvl) if axis == 0 else (lvl, i)
  1309. display_funcs_[idx] = partial(alias_, value=labels[ai][aj])
  1310. return self
  1311. def _element(
  1312. html_element: str,
  1313. html_class: str | None,
  1314. value: Any,
  1315. is_visible: bool,
  1316. **kwargs,
  1317. ) -> dict:
  1318. """
  1319. Template to return container with information for a <td></td> or <th></th> element.
  1320. """
  1321. if "display_value" not in kwargs:
  1322. kwargs["display_value"] = value
  1323. return {
  1324. "type": html_element,
  1325. "value": value,
  1326. "class": html_class,
  1327. "is_visible": is_visible,
  1328. **kwargs,
  1329. }
  1330. def _get_trimming_maximums(
  1331. rn,
  1332. cn,
  1333. max_elements,
  1334. max_rows=None,
  1335. max_cols=None,
  1336. scaling_factor: float = 0.8,
  1337. ) -> tuple[int, int]:
  1338. """
  1339. Recursively reduce the number of rows and columns to satisfy max elements.
  1340. Parameters
  1341. ----------
  1342. rn, cn : int
  1343. The number of input rows / columns
  1344. max_elements : int
  1345. The number of allowable elements
  1346. max_rows, max_cols : int, optional
  1347. Directly specify an initial maximum rows or columns before compression.
  1348. scaling_factor : float
  1349. Factor at which to reduce the number of rows / columns to fit.
  1350. Returns
  1351. -------
  1352. rn, cn : tuple
  1353. New rn and cn values that satisfy the max_elements constraint
  1354. """
  1355. def scale_down(rn, cn):
  1356. if cn >= rn:
  1357. return rn, int(cn * scaling_factor)
  1358. else:
  1359. return int(rn * scaling_factor), cn
  1360. if max_rows:
  1361. rn = max_rows if rn > max_rows else rn
  1362. if max_cols:
  1363. cn = max_cols if cn > max_cols else cn
  1364. while rn * cn > max_elements:
  1365. rn, cn = scale_down(rn, cn)
  1366. return rn, cn
  1367. def _get_level_lengths(
  1368. index: Index,
  1369. sparsify: bool,
  1370. max_index: int,
  1371. hidden_elements: Sequence[int] | None = None,
  1372. ):
  1373. """
  1374. Given an index, find the level length for each element.
  1375. Parameters
  1376. ----------
  1377. index : Index
  1378. Index or columns to determine lengths of each element
  1379. sparsify : bool
  1380. Whether to hide or show each distinct element in a MultiIndex
  1381. max_index : int
  1382. The maximum number of elements to analyse along the index due to trimming
  1383. hidden_elements : sequence of int
  1384. Index positions of elements hidden from display in the index affecting
  1385. length
  1386. Returns
  1387. -------
  1388. Dict :
  1389. Result is a dictionary of (level, initial_position): span
  1390. """
  1391. if isinstance(index, MultiIndex):
  1392. levels = index.format(sparsify=lib.no_default, adjoin=False)
  1393. else:
  1394. levels = index.format()
  1395. if hidden_elements is None:
  1396. hidden_elements = []
  1397. lengths = {}
  1398. if not isinstance(index, MultiIndex):
  1399. for i, value in enumerate(levels):
  1400. if i not in hidden_elements:
  1401. lengths[(0, i)] = 1
  1402. return lengths
  1403. for i, lvl in enumerate(levels):
  1404. visible_row_count = 0 # used to break loop due to display trimming
  1405. for j, row in enumerate(lvl):
  1406. if visible_row_count > max_index:
  1407. break
  1408. if not sparsify:
  1409. # then lengths will always equal 1 since no aggregation.
  1410. if j not in hidden_elements:
  1411. lengths[(i, j)] = 1
  1412. visible_row_count += 1
  1413. elif (row is not lib.no_default) and (j not in hidden_elements):
  1414. # this element has not been sparsified so must be the start of section
  1415. last_label = j
  1416. lengths[(i, last_label)] = 1
  1417. visible_row_count += 1
  1418. elif row is not lib.no_default:
  1419. # even if the above is hidden, keep track of it in case length > 1 and
  1420. # later elements are visible
  1421. last_label = j
  1422. lengths[(i, last_label)] = 0
  1423. elif j not in hidden_elements:
  1424. # then element must be part of sparsified section and is visible
  1425. visible_row_count += 1
  1426. if visible_row_count > max_index:
  1427. break # do not add a length since the render trim limit reached
  1428. if lengths[(i, last_label)] == 0:
  1429. # if previous iteration was first-of-section but hidden then offset
  1430. last_label = j
  1431. lengths[(i, last_label)] = 1
  1432. else:
  1433. # else add to previous iteration
  1434. lengths[(i, last_label)] += 1
  1435. non_zero_lengths = {
  1436. element: length for element, length in lengths.items() if length >= 1
  1437. }
  1438. return non_zero_lengths
  1439. def _is_visible(idx_row, idx_col, lengths) -> bool:
  1440. """
  1441. Index -> {(idx_row, idx_col): bool}).
  1442. """
  1443. return (idx_col, idx_row) in lengths
  1444. def format_table_styles(styles: CSSStyles) -> CSSStyles:
  1445. """
  1446. looks for multiple CSS selectors and separates them:
  1447. [{'selector': 'td, th', 'props': 'a:v;'}]
  1448. ---> [{'selector': 'td', 'props': 'a:v;'},
  1449. {'selector': 'th', 'props': 'a:v;'}]
  1450. """
  1451. return [
  1452. {"selector": selector, "props": css_dict["props"]}
  1453. for css_dict in styles
  1454. for selector in css_dict["selector"].split(",")
  1455. ]
  1456. def _default_formatter(x: Any, precision: int, thousands: bool = False) -> Any:
  1457. """
  1458. Format the display of a value
  1459. Parameters
  1460. ----------
  1461. x : Any
  1462. Input variable to be formatted
  1463. precision : Int
  1464. Floating point precision used if ``x`` is float or complex.
  1465. thousands : bool, default False
  1466. Whether to group digits with thousands separated with ",".
  1467. Returns
  1468. -------
  1469. value : Any
  1470. Matches input type, or string if input is float or complex or int with sep.
  1471. """
  1472. if is_float(x) or is_complex(x):
  1473. return f"{x:,.{precision}f}" if thousands else f"{x:.{precision}f}"
  1474. elif is_integer(x):
  1475. return f"{x:,.0f}" if thousands else f"{x:.0f}"
  1476. return x
  1477. def _wrap_decimal_thousands(
  1478. formatter: Callable, decimal: str, thousands: str | None
  1479. ) -> Callable:
  1480. """
  1481. Takes a string formatting function and wraps logic to deal with thousands and
  1482. decimal parameters, in the case that they are non-standard and that the input
  1483. is a (float, complex, int).
  1484. """
  1485. def wrapper(x):
  1486. if is_float(x) or is_integer(x) or is_complex(x):
  1487. if decimal != "." and thousands is not None and thousands != ",":
  1488. return (
  1489. formatter(x)
  1490. .replace(",", "§_§-") # rare string to avoid "," <-> "." clash.
  1491. .replace(".", decimal)
  1492. .replace("§_§-", thousands)
  1493. )
  1494. elif decimal != "." and (thousands is None or thousands == ","):
  1495. return formatter(x).replace(".", decimal)
  1496. elif decimal == "." and thousands is not None and thousands != ",":
  1497. return formatter(x).replace(",", thousands)
  1498. return formatter(x)
  1499. return wrapper
  1500. def _str_escape(x, escape):
  1501. """if escaping: only use on str, else return input"""
  1502. if isinstance(x, str):
  1503. if escape == "html":
  1504. return escape_html(x)
  1505. elif escape == "latex":
  1506. return _escape_latex(x)
  1507. else:
  1508. raise ValueError(
  1509. f"`escape` only permitted in {{'html', 'latex'}}, got {escape}"
  1510. )
  1511. return x
  1512. def _render_href(x, format):
  1513. """uses regex to detect a common URL pattern and converts to href tag in format."""
  1514. if isinstance(x, str):
  1515. if format == "html":
  1516. href = '<a href="{0}" target="_blank">{0}</a>'
  1517. elif format == "latex":
  1518. href = r"\href{{{0}}}{{{0}}}"
  1519. else:
  1520. raise ValueError("``hyperlinks`` format can only be 'html' or 'latex'")
  1521. pat = r"((http|ftp)s?:\/\/|www.)[\w/\-?=%.:@]+\.[\w/\-&?=%.,':;~!@#$*()\[\]]+"
  1522. return re.sub(pat, lambda m: href.format(m.group(0)), x)
  1523. return x
  1524. def _maybe_wrap_formatter(
  1525. formatter: BaseFormatter | None = None,
  1526. na_rep: str | None = None,
  1527. precision: int | None = None,
  1528. decimal: str = ".",
  1529. thousands: str | None = None,
  1530. escape: str | None = None,
  1531. hyperlinks: str | None = None,
  1532. ) -> Callable:
  1533. """
  1534. Allows formatters to be expressed as str, callable or None, where None returns
  1535. a default formatting function. wraps with na_rep, and precision where they are
  1536. available.
  1537. """
  1538. # Get initial func from input string, input callable, or from default factory
  1539. if isinstance(formatter, str):
  1540. func_0 = lambda x: formatter.format(x)
  1541. elif callable(formatter):
  1542. func_0 = formatter
  1543. elif formatter is None:
  1544. precision = (
  1545. get_option("styler.format.precision") if precision is None else precision
  1546. )
  1547. func_0 = partial(
  1548. _default_formatter, precision=precision, thousands=(thousands is not None)
  1549. )
  1550. else:
  1551. raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}")
  1552. # Replace chars if escaping
  1553. if escape is not None:
  1554. func_1 = lambda x: func_0(_str_escape(x, escape=escape))
  1555. else:
  1556. func_1 = func_0
  1557. # Replace decimals and thousands if non-standard inputs detected
  1558. if decimal != "." or (thousands is not None and thousands != ","):
  1559. func_2 = _wrap_decimal_thousands(func_1, decimal=decimal, thousands=thousands)
  1560. else:
  1561. func_2 = func_1
  1562. # Render links
  1563. if hyperlinks is not None:
  1564. func_3 = lambda x: func_2(_render_href(x, format=hyperlinks))
  1565. else:
  1566. func_3 = func_2
  1567. # Replace missing values if na_rep
  1568. if na_rep is None:
  1569. return func_3
  1570. else:
  1571. return lambda x: na_rep if (isna(x) is True) else func_3(x)
  1572. def non_reducing_slice(slice_: Subset):
  1573. """
  1574. Ensure that a slice doesn't reduce to a Series or Scalar.
  1575. Any user-passed `subset` should have this called on it
  1576. to make sure we're always working with DataFrames.
  1577. """
  1578. # default to column slice, like DataFrame
  1579. # ['A', 'B'] -> IndexSlices[:, ['A', 'B']]
  1580. kinds = (ABCSeries, np.ndarray, Index, list, str)
  1581. if isinstance(slice_, kinds):
  1582. slice_ = IndexSlice[:, slice_]
  1583. def pred(part) -> bool:
  1584. """
  1585. Returns
  1586. -------
  1587. bool
  1588. True if slice does *not* reduce,
  1589. False if `part` is a tuple.
  1590. """
  1591. # true when slice does *not* reduce, False when part is a tuple,
  1592. # i.e. MultiIndex slice
  1593. if isinstance(part, tuple):
  1594. # GH#39421 check for sub-slice:
  1595. return any((isinstance(s, slice) or is_list_like(s)) for s in part)
  1596. else:
  1597. return isinstance(part, slice) or is_list_like(part)
  1598. if not is_list_like(slice_):
  1599. if not isinstance(slice_, slice):
  1600. # a 1-d slice, like df.loc[1]
  1601. slice_ = [[slice_]]
  1602. else:
  1603. # slice(a, b, c)
  1604. slice_ = [slice_] # to tuplize later
  1605. else:
  1606. # error: Item "slice" of "Union[slice, Sequence[Any]]" has no attribute
  1607. # "__iter__" (not iterable) -> is specifically list_like in conditional
  1608. slice_ = [p if pred(p) else [p] for p in slice_] # type: ignore[union-attr]
  1609. return tuple(slice_)
  1610. def maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList:
  1611. """
  1612. Convert css-string to sequence of tuples format if needed.
  1613. 'color:red; border:1px solid black;' -> [('color', 'red'),
  1614. ('border','1px solid red')]
  1615. """
  1616. if isinstance(style, str):
  1617. s = style.split(";")
  1618. try:
  1619. return [
  1620. (x.split(":")[0].strip(), x.split(":")[1].strip())
  1621. for x in s
  1622. if x.strip() != ""
  1623. ]
  1624. except IndexError:
  1625. raise ValueError(
  1626. "Styles supplied as string must follow CSS rule formats, "
  1627. f"for example 'attr: val;'. '{style}' was given."
  1628. )
  1629. return style
  1630. def refactor_levels(
  1631. level: Level | list[Level] | None,
  1632. obj: Index,
  1633. ) -> list[int]:
  1634. """
  1635. Returns a consistent levels arg for use in ``hide_index`` or ``hide_columns``.
  1636. Parameters
  1637. ----------
  1638. level : int, str, list
  1639. Original ``level`` arg supplied to above methods.
  1640. obj:
  1641. Either ``self.index`` or ``self.columns``
  1642. Returns
  1643. -------
  1644. list : refactored arg with a list of levels to hide
  1645. """
  1646. if level is None:
  1647. levels_: list[int] = list(range(obj.nlevels))
  1648. elif isinstance(level, int):
  1649. levels_ = [level]
  1650. elif isinstance(level, str):
  1651. levels_ = [obj._get_level_number(level)]
  1652. elif isinstance(level, list):
  1653. levels_ = [
  1654. obj._get_level_number(lev) if not isinstance(lev, int) else lev
  1655. for lev in level
  1656. ]
  1657. else:
  1658. raise ValueError("`level` must be of type `int`, `str` or list of such")
  1659. return levels_
  1660. class Tooltips:
  1661. """
  1662. An extension to ``Styler`` that allows for and manipulates tooltips on hover
  1663. of ``<td>`` cells in the HTML result.
  1664. Parameters
  1665. ----------
  1666. css_name: str, default "pd-t"
  1667. Name of the CSS class that controls visualisation of tooltips.
  1668. css_props: list-like, default; see Notes
  1669. List of (attr, value) tuples defining properties of the CSS class.
  1670. tooltips: DataFrame, default empty
  1671. DataFrame of strings aligned with underlying Styler data for tooltip
  1672. display.
  1673. Notes
  1674. -----
  1675. The default properties for the tooltip CSS class are:
  1676. - visibility: hidden
  1677. - position: absolute
  1678. - z-index: 1
  1679. - background-color: black
  1680. - color: white
  1681. - transform: translate(-20px, -20px)
  1682. Hidden visibility is a key prerequisite to the hover functionality, and should
  1683. always be included in any manual properties specification.
  1684. """
  1685. def __init__(
  1686. self,
  1687. css_props: CSSProperties = [
  1688. ("visibility", "hidden"),
  1689. ("position", "absolute"),
  1690. ("z-index", 1),
  1691. ("background-color", "black"),
  1692. ("color", "white"),
  1693. ("transform", "translate(-20px, -20px)"),
  1694. ],
  1695. css_name: str = "pd-t",
  1696. tooltips: DataFrame = DataFrame(),
  1697. ) -> None:
  1698. self.class_name = css_name
  1699. self.class_properties = css_props
  1700. self.tt_data = tooltips
  1701. self.table_styles: CSSStyles = []
  1702. @property
  1703. def _class_styles(self):
  1704. """
  1705. Combine the ``_Tooltips`` CSS class name and CSS properties to the format
  1706. required to extend the underlying ``Styler`` `table_styles` to allow
  1707. tooltips to render in HTML.
  1708. Returns
  1709. -------
  1710. styles : List
  1711. """
  1712. return [
  1713. {
  1714. "selector": f".{self.class_name}",
  1715. "props": maybe_convert_css_to_tuples(self.class_properties),
  1716. }
  1717. ]
  1718. def _pseudo_css(self, uuid: str, name: str, row: int, col: int, text: str):
  1719. """
  1720. For every table data-cell that has a valid tooltip (not None, NaN or
  1721. empty string) must create two pseudo CSS entries for the specific
  1722. <td> element id which are added to overall table styles:
  1723. an on hover visibility change and a content change
  1724. dependent upon the user's chosen display string.
  1725. For example:
  1726. [{"selector": "T__row1_col1:hover .pd-t",
  1727. "props": [("visibility", "visible")]},
  1728. {"selector": "T__row1_col1 .pd-t::after",
  1729. "props": [("content", "Some Valid Text String")]}]
  1730. Parameters
  1731. ----------
  1732. uuid: str
  1733. The uuid of the Styler instance
  1734. name: str
  1735. The css-name of the class used for styling tooltips
  1736. row : int
  1737. The row index of the specified tooltip string data
  1738. col : int
  1739. The col index of the specified tooltip string data
  1740. text : str
  1741. The textual content of the tooltip to be displayed in HTML.
  1742. Returns
  1743. -------
  1744. pseudo_css : List
  1745. """
  1746. selector_id = "#T_" + uuid + "_row" + str(row) + "_col" + str(col)
  1747. return [
  1748. {
  1749. "selector": selector_id + f":hover .{name}",
  1750. "props": [("visibility", "visible")],
  1751. },
  1752. {
  1753. "selector": selector_id + f" .{name}::after",
  1754. "props": [("content", f'"{text}"')],
  1755. },
  1756. ]
  1757. def _translate(self, styler: StylerRenderer, d: dict):
  1758. """
  1759. Mutate the render dictionary to allow for tooltips:
  1760. - Add ``<span>`` HTML element to each data cells ``display_value``. Ignores
  1761. headers.
  1762. - Add table level CSS styles to control pseudo classes.
  1763. Parameters
  1764. ----------
  1765. styler_data : DataFrame
  1766. Underlying ``Styler`` DataFrame used for reindexing.
  1767. uuid : str
  1768. The underlying ``Styler`` uuid for CSS id.
  1769. d : dict
  1770. The dictionary prior to final render
  1771. Returns
  1772. -------
  1773. render_dict : Dict
  1774. """
  1775. self.tt_data = self.tt_data.reindex_like(styler.data)
  1776. if self.tt_data.empty:
  1777. return d
  1778. name = self.class_name
  1779. mask = (self.tt_data.isna()) | (self.tt_data.eq("")) # empty string = no ttip
  1780. self.table_styles = [
  1781. style
  1782. for sublist in [
  1783. self._pseudo_css(styler.uuid, name, i, j, str(self.tt_data.iloc[i, j]))
  1784. for i in range(len(self.tt_data.index))
  1785. for j in range(len(self.tt_data.columns))
  1786. if not (
  1787. mask.iloc[i, j]
  1788. or i in styler.hidden_rows
  1789. or j in styler.hidden_columns
  1790. )
  1791. ]
  1792. for style in sublist
  1793. ]
  1794. if self.table_styles:
  1795. # add span class to every cell only if at least 1 non-empty tooltip
  1796. for row in d["body"]:
  1797. for item in row:
  1798. if item["type"] == "td":
  1799. item["display_value"] = (
  1800. str(item["display_value"])
  1801. + f'<span class="{self.class_name}"></span>'
  1802. )
  1803. d["table_styles"].extend(self._class_styles)
  1804. d["table_styles"].extend(self.table_styles)
  1805. return d
  1806. def _parse_latex_table_wrapping(table_styles: CSSStyles, caption: str | None) -> bool:
  1807. """
  1808. Indicate whether LaTeX {tabular} should be wrapped with a {table} environment.
  1809. Parses the `table_styles` and detects any selectors which must be included outside
  1810. of {tabular}, i.e. indicating that wrapping must occur, and therefore return True,
  1811. or if a caption exists and requires similar.
  1812. """
  1813. IGNORED_WRAPPERS = ["toprule", "midrule", "bottomrule", "column_format"]
  1814. # ignored selectors are included with {tabular} so do not need wrapping
  1815. return (
  1816. table_styles is not None
  1817. and any(d["selector"] not in IGNORED_WRAPPERS for d in table_styles)
  1818. ) or caption is not None
  1819. def _parse_latex_table_styles(table_styles: CSSStyles, selector: str) -> str | None:
  1820. """
  1821. Return the first 'props' 'value' from ``tables_styles`` identified by ``selector``.
  1822. Examples
  1823. --------
  1824. >>> table_styles = [{'selector': 'foo', 'props': [('attr','value')]},
  1825. ... {'selector': 'bar', 'props': [('attr', 'overwritten')]},
  1826. ... {'selector': 'bar', 'props': [('a1', 'baz'), ('a2', 'ignore')]}]
  1827. >>> _parse_latex_table_styles(table_styles, selector='bar')
  1828. 'baz'
  1829. Notes
  1830. -----
  1831. The replacement of "§" with ":" is to avoid the CSS problem where ":" has structural
  1832. significance and cannot be used in LaTeX labels, but is often required by them.
  1833. """
  1834. for style in table_styles[::-1]: # in reverse for most recently applied style
  1835. if style["selector"] == selector:
  1836. return str(style["props"][0][1]).replace("§", ":")
  1837. return None
  1838. def _parse_latex_cell_styles(
  1839. latex_styles: CSSList, display_value: str, convert_css: bool = False
  1840. ) -> str:
  1841. r"""
  1842. Mutate the ``display_value`` string including LaTeX commands from ``latex_styles``.
  1843. This method builds a recursive latex chain of commands based on the
  1844. CSSList input, nested around ``display_value``.
  1845. If a CSS style is given as ('<command>', '<options>') this is translated to
  1846. '\<command><options>{display_value}', and this value is treated as the
  1847. display value for the next iteration.
  1848. The most recent style forms the inner component, for example for styles:
  1849. `[('c1', 'o1'), ('c2', 'o2')]` this returns: `\c1o1{\c2o2{display_value}}`
  1850. Sometimes latex commands have to be wrapped with curly braces in different ways:
  1851. We create some parsing flags to identify the different behaviours:
  1852. - `--rwrap` : `\<command><options>{<display_value>}`
  1853. - `--wrap` : `{\<command><options> <display_value>}`
  1854. - `--nowrap` : `\<command><options> <display_value>`
  1855. - `--lwrap` : `{\<command><options>} <display_value>`
  1856. - `--dwrap` : `{\<command><options>}{<display_value>}`
  1857. For example for styles:
  1858. `[('c1', 'o1--wrap'), ('c2', 'o2')]` this returns: `{\c1o1 \c2o2{display_value}}
  1859. """
  1860. if convert_css:
  1861. latex_styles = _parse_latex_css_conversion(latex_styles)
  1862. for command, options in latex_styles[::-1]: # in reverse for most recent style
  1863. formatter = {
  1864. "--wrap": f"{{\\{command}--to_parse {display_value}}}",
  1865. "--nowrap": f"\\{command}--to_parse {display_value}",
  1866. "--lwrap": f"{{\\{command}--to_parse}} {display_value}",
  1867. "--rwrap": f"\\{command}--to_parse{{{display_value}}}",
  1868. "--dwrap": f"{{\\{command}--to_parse}}{{{display_value}}}",
  1869. }
  1870. display_value = f"\\{command}{options} {display_value}"
  1871. for arg in ["--nowrap", "--wrap", "--lwrap", "--rwrap", "--dwrap"]:
  1872. if arg in str(options):
  1873. display_value = formatter[arg].replace(
  1874. "--to_parse", _parse_latex_options_strip(value=options, arg=arg)
  1875. )
  1876. break # only ever one purposeful entry
  1877. return display_value
  1878. def _parse_latex_header_span(
  1879. cell: dict[str, Any],
  1880. multirow_align: str,
  1881. multicol_align: str,
  1882. wrap: bool = False,
  1883. convert_css: bool = False,
  1884. ) -> str:
  1885. r"""
  1886. Refactor the cell `display_value` if a 'colspan' or 'rowspan' attribute is present.
  1887. 'rowspan' and 'colspan' do not occur simultaneouly. If they are detected then
  1888. the `display_value` is altered to a LaTeX `multirow` or `multicol` command
  1889. respectively, with the appropriate cell-span.
  1890. ``wrap`` is used to enclose the `display_value` in braces which is needed for
  1891. column headers using an siunitx package.
  1892. Requires the package {multirow}, whereas multicol support is usually built in
  1893. to the {tabular} environment.
  1894. Examples
  1895. --------
  1896. >>> cell = {'cellstyle': '', 'display_value':'text', 'attributes': 'colspan="3"'}
  1897. >>> _parse_latex_header_span(cell, 't', 'c')
  1898. '\\multicolumn{3}{c}{text}'
  1899. """
  1900. display_val = _parse_latex_cell_styles(
  1901. cell["cellstyle"], cell["display_value"], convert_css
  1902. )
  1903. if "attributes" in cell:
  1904. attrs = cell["attributes"]
  1905. if 'colspan="' in attrs:
  1906. colspan = attrs[attrs.find('colspan="') + 9 :] # len('colspan="') = 9
  1907. colspan = int(colspan[: colspan.find('"')])
  1908. if "naive-l" == multicol_align:
  1909. out = f"{{{display_val}}}" if wrap else f"{display_val}"
  1910. blanks = " & {}" if wrap else " &"
  1911. return out + blanks * (colspan - 1)
  1912. elif "naive-r" == multicol_align:
  1913. out = f"{{{display_val}}}" if wrap else f"{display_val}"
  1914. blanks = "{} & " if wrap else "& "
  1915. return blanks * (colspan - 1) + out
  1916. return f"\\multicolumn{{{colspan}}}{{{multicol_align}}}{{{display_val}}}"
  1917. elif 'rowspan="' in attrs:
  1918. if multirow_align == "naive":
  1919. return display_val
  1920. rowspan = attrs[attrs.find('rowspan="') + 9 :]
  1921. rowspan = int(rowspan[: rowspan.find('"')])
  1922. return f"\\multirow[{multirow_align}]{{{rowspan}}}{{*}}{{{display_val}}}"
  1923. if wrap:
  1924. return f"{{{display_val}}}"
  1925. else:
  1926. return display_val
  1927. def _parse_latex_options_strip(value: str | float, arg: str) -> str:
  1928. """
  1929. Strip a css_value which may have latex wrapping arguments, css comment identifiers,
  1930. and whitespaces, to a valid string for latex options parsing.
  1931. For example: 'red /* --wrap */ ' --> 'red'
  1932. """
  1933. return str(value).replace(arg, "").replace("/*", "").replace("*/", "").strip()
  1934. def _parse_latex_css_conversion(styles: CSSList) -> CSSList:
  1935. """
  1936. Convert CSS (attribute,value) pairs to equivalent LaTeX (command,options) pairs.
  1937. Ignore conversion if tagged with `--latex` option, skipped if no conversion found.
  1938. """
  1939. def font_weight(value, arg):
  1940. if value in ("bold", "bolder"):
  1941. return "bfseries", f"{arg}"
  1942. return None
  1943. def font_style(value, arg):
  1944. if value == "italic":
  1945. return "itshape", f"{arg}"
  1946. if value == "oblique":
  1947. return "slshape", f"{arg}"
  1948. return None
  1949. def color(value, user_arg, command, comm_arg):
  1950. """
  1951. CSS colors have 5 formats to process:
  1952. - 6 digit hex code: "#ff23ee" --> [HTML]{FF23EE}
  1953. - 3 digit hex code: "#f0e" --> [HTML]{FF00EE}
  1954. - rgba: rgba(128, 255, 0, 0.5) --> [rgb]{0.502, 1.000, 0.000}
  1955. - rgb: rgb(128, 255, 0,) --> [rbg]{0.502, 1.000, 0.000}
  1956. - string: red --> {red}
  1957. Additionally rgb or rgba can be expressed in % which is also parsed.
  1958. """
  1959. arg = user_arg if user_arg != "" else comm_arg
  1960. if value[0] == "#" and len(value) == 7: # color is hex code
  1961. return command, f"[HTML]{{{value[1:].upper()}}}{arg}"
  1962. if value[0] == "#" and len(value) == 4: # color is short hex code
  1963. val = f"{value[1].upper()*2}{value[2].upper()*2}{value[3].upper()*2}"
  1964. return command, f"[HTML]{{{val}}}{arg}"
  1965. elif value[:3] == "rgb": # color is rgb or rgba
  1966. r = re.findall("(?<=\\()[0-9\\s%]+(?=,)", value)[0].strip()
  1967. r = float(r[:-1]) / 100 if "%" in r else int(r) / 255
  1968. g = re.findall("(?<=,)[0-9\\s%]+(?=,)", value)[0].strip()
  1969. g = float(g[:-1]) / 100 if "%" in g else int(g) / 255
  1970. if value[3] == "a": # color is rgba
  1971. b = re.findall("(?<=,)[0-9\\s%]+(?=,)", value)[1].strip()
  1972. else: # color is rgb
  1973. b = re.findall("(?<=,)[0-9\\s%]+(?=\\))", value)[0].strip()
  1974. b = float(b[:-1]) / 100 if "%" in b else int(b) / 255
  1975. return command, f"[rgb]{{{r:.3f}, {g:.3f}, {b:.3f}}}{arg}"
  1976. else:
  1977. return command, f"{{{value}}}{arg}" # color is likely string-named
  1978. CONVERTED_ATTRIBUTES: dict[str, Callable] = {
  1979. "font-weight": font_weight,
  1980. "background-color": partial(color, command="cellcolor", comm_arg="--lwrap"),
  1981. "color": partial(color, command="color", comm_arg=""),
  1982. "font-style": font_style,
  1983. }
  1984. latex_styles: CSSList = []
  1985. for attribute, value in styles:
  1986. if isinstance(value, str) and "--latex" in value:
  1987. # return the style without conversion but drop '--latex'
  1988. latex_styles.append((attribute, value.replace("--latex", "")))
  1989. if attribute in CONVERTED_ATTRIBUTES:
  1990. arg = ""
  1991. for x in ["--wrap", "--nowrap", "--lwrap", "--dwrap", "--rwrap"]:
  1992. if x in str(value):
  1993. arg, value = x, _parse_latex_options_strip(value, x)
  1994. break
  1995. latex_style = CONVERTED_ATTRIBUTES[attribute](value, arg)
  1996. if latex_style is not None:
  1997. latex_styles.extend([latex_style])
  1998. return latex_styles
  1999. def _escape_latex(s):
  2000. r"""
  2001. Replace the characters ``&``, ``%``, ``$``, ``#``, ``_``, ``{``, ``}``,
  2002. ``~``, ``^``, and ``\`` in the string with LaTeX-safe sequences.
  2003. Use this if you need to display text that might contain such characters in LaTeX.
  2004. Parameters
  2005. ----------
  2006. s : str
  2007. Input to be escaped
  2008. Return
  2009. ------
  2010. str :
  2011. Escaped string
  2012. """
  2013. return (
  2014. s.replace("\\", "ab2§=§8yz") # rare string for final conversion: avoid \\ clash
  2015. .replace("ab2§=§8yz ", "ab2§=§8yz\\space ") # since \backslash gobbles spaces
  2016. .replace("&", "\\&")
  2017. .replace("%", "\\%")
  2018. .replace("$", "\\$")
  2019. .replace("#", "\\#")
  2020. .replace("_", "\\_")
  2021. .replace("{", "\\{")
  2022. .replace("}", "\\}")
  2023. .replace("~ ", "~\\space ") # since \textasciitilde gobbles spaces
  2024. .replace("~", "\\textasciitilde ")
  2025. .replace("^ ", "^\\space ") # since \textasciicircum gobbles spaces
  2026. .replace("^", "\\textasciicircum ")
  2027. .replace("ab2§=§8yz", "\\textbackslash ")
  2028. )