123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633 |
- """
- Module for formatting output data in HTML.
- """
- from __future__ import annotations
- from textwrap import dedent
- from typing import (
- Any,
- Final,
- Hashable,
- Iterable,
- Mapping,
- cast,
- )
- from pandas._config import get_option
- from pandas._libs import lib
- from pandas import (
- MultiIndex,
- option_context,
- )
- from pandas.io.common import is_url
- from pandas.io.formats.format import (
- DataFrameFormatter,
- get_level_lengths,
- )
- from pandas.io.formats.printing import pprint_thing
- class HTMLFormatter:
- """
- Internal class for formatting output data in html.
- This class is intended for shared functionality between
- DataFrame.to_html() and DataFrame._repr_html_().
- Any logic in common with other output formatting methods
- should ideally be inherited from classes in format.py
- and this class responsible for only producing html markup.
- """
- indent_delta: Final = 2
- def __init__(
- self,
- formatter: DataFrameFormatter,
- classes: str | list[str] | tuple[str, ...] | None = None,
- border: int | bool | None = None,
- table_id: str | None = None,
- render_links: bool = False,
- ) -> None:
- self.fmt = formatter
- self.classes = classes
- self.frame = self.fmt.frame
- self.columns = self.fmt.tr_frame.columns
- self.elements: list[str] = []
- self.bold_rows = self.fmt.bold_rows
- self.escape = self.fmt.escape
- self.show_dimensions = self.fmt.show_dimensions
- if border is None or border is True:
- border = cast(int, get_option("display.html.border"))
- elif not border:
- border = None
- self.border = border
- self.table_id = table_id
- self.render_links = render_links
- self.col_space = {
- column: f"{value}px" if isinstance(value, int) else value
- for column, value in self.fmt.col_space.items()
- }
- def to_string(self) -> str:
- lines = self.render()
- if any(isinstance(x, str) for x in lines):
- lines = [str(x) for x in lines]
- return "\n".join(lines)
- def render(self) -> list[str]:
- self._write_table()
- if self.should_show_dimensions:
- by = chr(215) # ×
- self.write(
- f"<p>{len(self.frame)} rows {by} {len(self.frame.columns)} columns</p>"
- )
- return self.elements
- @property
- def should_show_dimensions(self) -> bool:
- return self.fmt.should_show_dimensions
- @property
- def show_row_idx_names(self) -> bool:
- return self.fmt.show_row_idx_names
- @property
- def show_col_idx_names(self) -> bool:
- return self.fmt.show_col_idx_names
- @property
- def row_levels(self) -> int:
- if self.fmt.index:
- # showing (row) index
- return self.frame.index.nlevels
- elif self.show_col_idx_names:
- # see gh-22579
- # Column misalignment also occurs for
- # a standard index when the columns index is named.
- # If the row index is not displayed a column of
- # blank cells need to be included before the DataFrame values.
- return 1
- # not showing (row) index
- return 0
- def _get_columns_formatted_values(self) -> Iterable:
- return self.columns
- @property
- def is_truncated(self) -> bool:
- return self.fmt.is_truncated
- @property
- def ncols(self) -> int:
- return len(self.fmt.tr_frame.columns)
- def write(self, s: Any, indent: int = 0) -> None:
- rs = pprint_thing(s)
- self.elements.append(" " * indent + rs)
- def write_th(
- self, s: Any, header: bool = False, indent: int = 0, tags: str | None = None
- ) -> None:
- """
- Method for writing a formatted <th> cell.
- If col_space is set on the formatter then that is used for
- the value of min-width.
- Parameters
- ----------
- s : object
- The data to be written inside the cell.
- header : bool, default False
- Set to True if the <th> is for use inside <thead>. This will
- cause min-width to be set if there is one.
- indent : int, default 0
- The indentation level of the cell.
- tags : str, default None
- Tags to include in the cell.
- Returns
- -------
- A written <th> cell.
- """
- col_space = self.col_space.get(s, None)
- if header and col_space is not None:
- tags = tags or ""
- tags += f'style="min-width: {col_space};"'
- self._write_cell(s, kind="th", indent=indent, tags=tags)
- def write_td(self, s: Any, indent: int = 0, tags: str | None = None) -> None:
- self._write_cell(s, kind="td", indent=indent, tags=tags)
- def _write_cell(
- self, s: Any, kind: str = "td", indent: int = 0, tags: str | None = None
- ) -> None:
- if tags is not None:
- start_tag = f"<{kind} {tags}>"
- else:
- start_tag = f"<{kind}>"
- if self.escape:
- # escape & first to prevent double escaping of &
- esc = {"&": r"&", "<": r"<", ">": r">"}
- else:
- esc = {}
- rs = pprint_thing(s, escape_chars=esc).strip()
- if self.render_links and is_url(rs):
- rs_unescaped = pprint_thing(s, escape_chars={}).strip()
- start_tag += f'<a href="{rs_unescaped}" target="_blank">'
- end_a = "</a>"
- else:
- end_a = ""
- self.write(f"{start_tag}{rs}{end_a}</{kind}>", indent)
- def write_tr(
- self,
- line: Iterable,
- indent: int = 0,
- indent_delta: int = 0,
- header: bool = False,
- align: str | None = None,
- tags: dict[int, str] | None = None,
- nindex_levels: int = 0,
- ) -> None:
- if tags is None:
- tags = {}
- if align is None:
- self.write("<tr>", indent)
- else:
- self.write(f'<tr style="text-align: {align};">', indent)
- indent += indent_delta
- for i, s in enumerate(line):
- val_tag = tags.get(i, None)
- if header or (self.bold_rows and i < nindex_levels):
- self.write_th(s, indent=indent, header=header, tags=val_tag)
- else:
- self.write_td(s, indent, tags=val_tag)
- indent -= indent_delta
- self.write("</tr>", indent)
- def _write_table(self, indent: int = 0) -> None:
- _classes = ["dataframe"] # Default class.
- use_mathjax = get_option("display.html.use_mathjax")
- if not use_mathjax:
- _classes.append("tex2jax_ignore")
- if self.classes is not None:
- if isinstance(self.classes, str):
- self.classes = self.classes.split()
- if not isinstance(self.classes, (list, tuple)):
- raise TypeError(
- "classes must be a string, list, "
- f"or tuple, not {type(self.classes)}"
- )
- _classes.extend(self.classes)
- if self.table_id is None:
- id_section = ""
- else:
- id_section = f' id="{self.table_id}"'
- if self.border is None:
- border_attr = ""
- else:
- border_attr = f' border="{self.border}"'
- self.write(
- f'<table{border_attr} class="{" ".join(_classes)}"{id_section}>',
- indent,
- )
- if self.fmt.header or self.show_row_idx_names:
- self._write_header(indent + self.indent_delta)
- self._write_body(indent + self.indent_delta)
- self.write("</table>", indent)
- def _write_col_header(self, indent: int) -> None:
- row: list[Hashable]
- is_truncated_horizontally = self.fmt.is_truncated_horizontally
- if isinstance(self.columns, MultiIndex):
- template = 'colspan="{span:d}" halign="left"'
- sentinel: lib.NoDefault | bool
- if self.fmt.sparsify:
- # GH3547
- sentinel = lib.no_default
- else:
- sentinel = False
- levels = self.columns.format(sparsify=sentinel, adjoin=False, names=False)
- level_lengths = get_level_lengths(levels, sentinel)
- inner_lvl = len(level_lengths) - 1
- for lnum, (records, values) in enumerate(zip(level_lengths, levels)):
- if is_truncated_horizontally:
- # modify the header lines
- ins_col = self.fmt.tr_col_num
- if self.fmt.sparsify:
- recs_new = {}
- # Increment tags after ... col.
- for tag, span in list(records.items()):
- if tag >= ins_col:
- recs_new[tag + 1] = span
- elif tag + span > ins_col:
- recs_new[tag] = span + 1
- if lnum == inner_lvl:
- values = (
- values[:ins_col] + ("...",) + values[ins_col:]
- )
- else:
- # sparse col headers do not receive a ...
- values = (
- values[:ins_col]
- + (values[ins_col - 1],)
- + values[ins_col:]
- )
- else:
- recs_new[tag] = span
- # if ins_col lies between tags, all col headers
- # get ...
- if tag + span == ins_col:
- recs_new[ins_col] = 1
- values = values[:ins_col] + ("...",) + values[ins_col:]
- records = recs_new
- inner_lvl = len(level_lengths) - 1
- if lnum == inner_lvl:
- records[ins_col] = 1
- else:
- recs_new = {}
- for tag, span in list(records.items()):
- if tag >= ins_col:
- recs_new[tag + 1] = span
- else:
- recs_new[tag] = span
- recs_new[ins_col] = 1
- records = recs_new
- values = values[:ins_col] + ["..."] + values[ins_col:]
- # see gh-22579
- # Column Offset Bug with to_html(index=False) with
- # MultiIndex Columns and Index.
- # Initially fill row with blank cells before column names.
- # TODO: Refactor to remove code duplication with code
- # block below for standard columns index.
- row = [""] * (self.row_levels - 1)
- if self.fmt.index or self.show_col_idx_names:
- # see gh-22747
- # If to_html(index_names=False) do not show columns
- # index names.
- # TODO: Refactor to use _get_column_name_list from
- # DataFrameFormatter class and create a
- # _get_formatted_column_labels function for code
- # parity with DataFrameFormatter class.
- if self.fmt.show_index_names:
- name = self.columns.names[lnum]
- row.append(pprint_thing(name or ""))
- else:
- row.append("")
- tags = {}
- j = len(row)
- for i, v in enumerate(values):
- if i in records:
- if records[i] > 1:
- tags[j] = template.format(span=records[i])
- else:
- continue
- j += 1
- row.append(v)
- self.write_tr(row, indent, self.indent_delta, tags=tags, header=True)
- else:
- # see gh-22579
- # Column misalignment also occurs for
- # a standard index when the columns index is named.
- # Initially fill row with blank cells before column names.
- # TODO: Refactor to remove code duplication with code block
- # above for columns MultiIndex.
- row = [""] * (self.row_levels - 1)
- if self.fmt.index or self.show_col_idx_names:
- # see gh-22747
- # If to_html(index_names=False) do not show columns
- # index names.
- # TODO: Refactor to use _get_column_name_list from
- # DataFrameFormatter class.
- if self.fmt.show_index_names:
- row.append(self.columns.name or "")
- else:
- row.append("")
- row.extend(self._get_columns_formatted_values())
- align = self.fmt.justify
- if is_truncated_horizontally:
- ins_col = self.row_levels + self.fmt.tr_col_num
- row.insert(ins_col, "...")
- self.write_tr(row, indent, self.indent_delta, header=True, align=align)
- def _write_row_header(self, indent: int) -> None:
- is_truncated_horizontally = self.fmt.is_truncated_horizontally
- row = [x if x is not None else "" for x in self.frame.index.names] + [""] * (
- self.ncols + (1 if is_truncated_horizontally else 0)
- )
- self.write_tr(row, indent, self.indent_delta, header=True)
- def _write_header(self, indent: int) -> None:
- self.write("<thead>", indent)
- if self.fmt.header:
- self._write_col_header(indent + self.indent_delta)
- if self.show_row_idx_names:
- self._write_row_header(indent + self.indent_delta)
- self.write("</thead>", indent)
- def _get_formatted_values(self) -> dict[int, list[str]]:
- with option_context("display.max_colwidth", None):
- fmt_values = {i: self.fmt.format_col(i) for i in range(self.ncols)}
- return fmt_values
- def _write_body(self, indent: int) -> None:
- self.write("<tbody>", indent)
- fmt_values = self._get_formatted_values()
- # write values
- if self.fmt.index and isinstance(self.frame.index, MultiIndex):
- self._write_hierarchical_rows(fmt_values, indent + self.indent_delta)
- else:
- self._write_regular_rows(fmt_values, indent + self.indent_delta)
- self.write("</tbody>", indent)
- def _write_regular_rows(
- self, fmt_values: Mapping[int, list[str]], indent: int
- ) -> None:
- is_truncated_horizontally = self.fmt.is_truncated_horizontally
- is_truncated_vertically = self.fmt.is_truncated_vertically
- nrows = len(self.fmt.tr_frame)
- if self.fmt.index:
- fmt = self.fmt._get_formatter("__index__")
- if fmt is not None:
- index_values = self.fmt.tr_frame.index.map(fmt)
- else:
- index_values = self.fmt.tr_frame.index.format()
- row: list[str] = []
- for i in range(nrows):
- if is_truncated_vertically and i == (self.fmt.tr_row_num):
- str_sep_row = ["..."] * len(row)
- self.write_tr(
- str_sep_row,
- indent,
- self.indent_delta,
- tags=None,
- nindex_levels=self.row_levels,
- )
- row = []
- if self.fmt.index:
- row.append(index_values[i])
- # see gh-22579
- # Column misalignment also occurs for
- # a standard index when the columns index is named.
- # Add blank cell before data cells.
- elif self.show_col_idx_names:
- row.append("")
- row.extend(fmt_values[j][i] for j in range(self.ncols))
- if is_truncated_horizontally:
- dot_col_ix = self.fmt.tr_col_num + self.row_levels
- row.insert(dot_col_ix, "...")
- self.write_tr(
- row, indent, self.indent_delta, tags=None, nindex_levels=self.row_levels
- )
- def _write_hierarchical_rows(
- self, fmt_values: Mapping[int, list[str]], indent: int
- ) -> None:
- template = 'rowspan="{span}" valign="top"'
- is_truncated_horizontally = self.fmt.is_truncated_horizontally
- is_truncated_vertically = self.fmt.is_truncated_vertically
- frame = self.fmt.tr_frame
- nrows = len(frame)
- assert isinstance(frame.index, MultiIndex)
- idx_values = frame.index.format(sparsify=False, adjoin=False, names=False)
- idx_values = list(zip(*idx_values))
- if self.fmt.sparsify:
- # GH3547
- sentinel = lib.no_default
- levels = frame.index.format(sparsify=sentinel, adjoin=False, names=False)
- level_lengths = get_level_lengths(levels, sentinel)
- inner_lvl = len(level_lengths) - 1
- if is_truncated_vertically:
- # Insert ... row and adjust idx_values and
- # level_lengths to take this into account.
- ins_row = self.fmt.tr_row_num
- inserted = False
- for lnum, records in enumerate(level_lengths):
- rec_new = {}
- for tag, span in list(records.items()):
- if tag >= ins_row:
- rec_new[tag + 1] = span
- elif tag + span > ins_row:
- rec_new[tag] = span + 1
- # GH 14882 - Make sure insertion done once
- if not inserted:
- dot_row = list(idx_values[ins_row - 1])
- dot_row[-1] = "..."
- idx_values.insert(ins_row, tuple(dot_row))
- inserted = True
- else:
- dot_row = list(idx_values[ins_row])
- dot_row[inner_lvl - lnum] = "..."
- idx_values[ins_row] = tuple(dot_row)
- else:
- rec_new[tag] = span
- # If ins_row lies between tags, all cols idx cols
- # receive ...
- if tag + span == ins_row:
- rec_new[ins_row] = 1
- if lnum == 0:
- idx_values.insert(
- ins_row, tuple(["..."] * len(level_lengths))
- )
- # GH 14882 - Place ... in correct level
- elif inserted:
- dot_row = list(idx_values[ins_row])
- dot_row[inner_lvl - lnum] = "..."
- idx_values[ins_row] = tuple(dot_row)
- level_lengths[lnum] = rec_new
- level_lengths[inner_lvl][ins_row] = 1
- for ix_col in fmt_values:
- fmt_values[ix_col].insert(ins_row, "...")
- nrows += 1
- for i in range(nrows):
- row = []
- tags = {}
- sparse_offset = 0
- j = 0
- for records, v in zip(level_lengths, idx_values[i]):
- if i in records:
- if records[i] > 1:
- tags[j] = template.format(span=records[i])
- else:
- sparse_offset += 1
- continue
- j += 1
- row.append(v)
- row.extend(fmt_values[j][i] for j in range(self.ncols))
- if is_truncated_horizontally:
- row.insert(
- self.row_levels - sparse_offset + self.fmt.tr_col_num, "..."
- )
- self.write_tr(
- row,
- indent,
- self.indent_delta,
- tags=tags,
- nindex_levels=len(levels) - sparse_offset,
- )
- else:
- row = []
- for i in range(len(frame)):
- if is_truncated_vertically and i == (self.fmt.tr_row_num):
- str_sep_row = ["..."] * len(row)
- self.write_tr(
- str_sep_row,
- indent,
- self.indent_delta,
- tags=None,
- nindex_levels=self.row_levels,
- )
- idx_values = list(
- zip(*frame.index.format(sparsify=False, adjoin=False, names=False))
- )
- row = []
- row.extend(idx_values[i])
- row.extend(fmt_values[j][i] for j in range(self.ncols))
- if is_truncated_horizontally:
- row.insert(self.row_levels + self.fmt.tr_col_num, "...")
- self.write_tr(
- row,
- indent,
- self.indent_delta,
- tags=None,
- nindex_levels=frame.index.nlevels,
- )
- class NotebookFormatter(HTMLFormatter):
- """
- Internal class for formatting output data in html for display in Jupyter
- Notebooks. This class is intended for functionality specific to
- DataFrame._repr_html_() and DataFrame.to_html(notebook=True)
- """
- def _get_formatted_values(self) -> dict[int, list[str]]:
- return {i: self.fmt.format_col(i) for i in range(self.ncols)}
- def _get_columns_formatted_values(self) -> list[str]:
- return self.columns.format()
- def write_style(self) -> None:
- # We use the "scoped" attribute here so that the desired
- # style properties for the data frame are not then applied
- # throughout the entire notebook.
- template_first = """\
- <style scoped>"""
- template_last = """\
- </style>"""
- template_select = """\
- .dataframe %s {
- %s: %s;
- }"""
- element_props = [
- ("tbody tr th:only-of-type", "vertical-align", "middle"),
- ("tbody tr th", "vertical-align", "top"),
- ]
- if isinstance(self.columns, MultiIndex):
- element_props.append(("thead tr th", "text-align", "left"))
- if self.show_row_idx_names:
- element_props.append(
- ("thead tr:last-of-type th", "text-align", "right")
- )
- else:
- element_props.append(("thead th", "text-align", "right"))
- template_mid = "\n\n".join(map(lambda t: template_select % t, element_props))
- template = dedent("\n".join((template_first, template_mid, template_last)))
- self.write(template)
- def render(self) -> list[str]:
- self.write("<div>")
- self.write_style()
- super().render()
- self.write("</div>")
- return self.elements
|