123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831 |
- """
- Module for formatting output data in Latex.
- """
- from __future__ import annotations
- from abc import (
- ABC,
- abstractmethod,
- )
- from typing import (
- TYPE_CHECKING,
- Iterator,
- Sequence,
- )
- import numpy as np
- from pandas.core.dtypes.generic import ABCMultiIndex
- if TYPE_CHECKING:
- from pandas.io.formats.format import DataFrameFormatter
- def _split_into_full_short_caption(
- caption: str | tuple[str, str] | None
- ) -> tuple[str, str]:
- """Extract full and short captions from caption string/tuple.
- Parameters
- ----------
- caption : str or tuple, optional
- Either table caption string or tuple (full_caption, short_caption).
- If string is provided, then it is treated as table full caption,
- while short_caption is considered an empty string.
- Returns
- -------
- full_caption, short_caption : tuple
- Tuple of full_caption, short_caption strings.
- """
- if caption:
- if isinstance(caption, str):
- full_caption = caption
- short_caption = ""
- else:
- try:
- full_caption, short_caption = caption
- except ValueError as err:
- msg = "caption must be either a string or a tuple of two strings"
- raise ValueError(msg) from err
- else:
- full_caption = ""
- short_caption = ""
- return full_caption, short_caption
- class RowStringConverter:
- r"""Converter for dataframe rows into LaTeX strings.
- Parameters
- ----------
- formatter : `DataFrameFormatter`
- Instance of `DataFrameFormatter`.
- multicolumn: bool, optional
- Whether to use \multicolumn macro.
- multicolumn_format: str, optional
- Multicolumn format.
- multirow: bool, optional
- Whether to use \multirow macro.
- """
- def __init__(
- self,
- formatter: DataFrameFormatter,
- multicolumn: bool = False,
- multicolumn_format: str | None = None,
- multirow: bool = False,
- ) -> None:
- self.fmt = formatter
- self.frame = self.fmt.frame
- self.multicolumn = multicolumn
- self.multicolumn_format = multicolumn_format
- self.multirow = multirow
- self.clinebuf: list[list[int]] = []
- self.strcols = self._get_strcols()
- self.strrows = list(zip(*self.strcols))
- def get_strrow(self, row_num: int) -> str:
- """Get string representation of the row."""
- row = self.strrows[row_num]
- is_multicol = (
- row_num < self.column_levels and self.fmt.header and self.multicolumn
- )
- is_multirow = (
- row_num >= self.header_levels
- and self.fmt.index
- and self.multirow
- and self.index_levels > 1
- )
- is_cline_maybe_required = is_multirow and row_num < len(self.strrows) - 1
- crow = self._preprocess_row(row)
- if is_multicol:
- crow = self._format_multicolumn(crow)
- if is_multirow:
- crow = self._format_multirow(crow, row_num)
- lst = []
- lst.append(" & ".join(crow))
- lst.append(" \\\\")
- if is_cline_maybe_required:
- cline = self._compose_cline(row_num, len(self.strcols))
- lst.append(cline)
- return "".join(lst)
- @property
- def _header_row_num(self) -> int:
- """Number of rows in header."""
- return self.header_levels if self.fmt.header else 0
- @property
- def index_levels(self) -> int:
- """Integer number of levels in index."""
- return self.frame.index.nlevels
- @property
- def column_levels(self) -> int:
- return self.frame.columns.nlevels
- @property
- def header_levels(self) -> int:
- nlevels = self.column_levels
- if self.fmt.has_index_names and self.fmt.show_index_names:
- nlevels += 1
- return nlevels
- def _get_strcols(self) -> list[list[str]]:
- """String representation of the columns."""
- if self.fmt.frame.empty:
- strcols = [[self._empty_info_line]]
- else:
- strcols = self.fmt.get_strcols()
- # reestablish the MultiIndex that has been joined by get_strcols()
- if self.fmt.index and isinstance(self.frame.index, ABCMultiIndex):
- out = self.frame.index.format(
- adjoin=False,
- sparsify=self.fmt.sparsify,
- names=self.fmt.has_index_names,
- na_rep=self.fmt.na_rep,
- )
- # index.format will sparsify repeated entries with empty strings
- # so pad these with some empty space
- def pad_empties(x):
- for pad in reversed(x):
- if pad:
- return [x[0]] + [i if i else " " * len(pad) for i in x[1:]]
- gen = (pad_empties(i) for i in out)
- # Add empty spaces for each column level
- clevels = self.frame.columns.nlevels
- out = [[" " * len(i[-1])] * clevels + i for i in gen]
- # Add the column names to the last index column
- cnames = self.frame.columns.names
- if any(cnames):
- new_names = [i if i else "{}" for i in cnames]
- out[self.frame.index.nlevels - 1][:clevels] = new_names
- # Get rid of old multiindex column and add new ones
- strcols = out + strcols[1:]
- return strcols
- @property
- def _empty_info_line(self) -> str:
- return (
- f"Empty {type(self.frame).__name__}\n"
- f"Columns: {self.frame.columns}\n"
- f"Index: {self.frame.index}"
- )
- def _preprocess_row(self, row: Sequence[str]) -> list[str]:
- """Preprocess elements of the row."""
- if self.fmt.escape:
- crow = _escape_symbols(row)
- else:
- crow = [x if x else "{}" for x in row]
- if self.fmt.bold_rows and self.fmt.index:
- crow = _convert_to_bold(crow, self.index_levels)
- return crow
- def _format_multicolumn(self, row: list[str]) -> list[str]:
- r"""
- Combine columns belonging to a group to a single multicolumn entry
- according to self.multicolumn_format
- e.g.:
- a & & & b & c &
- will become
- \multicolumn{3}{l}{a} & b & \multicolumn{2}{l}{c}
- """
- row2 = row[: self.index_levels]
- ncol = 1
- coltext = ""
- def append_col() -> None:
- # write multicolumn if needed
- if ncol > 1:
- row2.append(
- f"\\multicolumn{{{ncol:d}}}{{{self.multicolumn_format}}}"
- f"{{{coltext.strip()}}}"
- )
- # don't modify where not needed
- else:
- row2.append(coltext)
- for c in row[self.index_levels :]:
- # if next col has text, write the previous
- if c.strip():
- if coltext:
- append_col()
- coltext = c
- ncol = 1
- # if not, add it to the previous multicolumn
- else:
- ncol += 1
- # write last column name
- if coltext:
- append_col()
- return row2
- def _format_multirow(self, row: list[str], i: int) -> list[str]:
- r"""
- Check following rows, whether row should be a multirow
- e.g.: becomes:
- a & 0 & \multirow{2}{*}{a} & 0 &
- & 1 & & 1 &
- b & 0 & \cline{1-2}
- b & 0 &
- """
- for j in range(self.index_levels):
- if row[j].strip():
- nrow = 1
- for r in self.strrows[i + 1 :]:
- if not r[j].strip():
- nrow += 1
- else:
- break
- if nrow > 1:
- # overwrite non-multirow entry
- row[j] = f"\\multirow{{{nrow:d}}}{{*}}{{{row[j].strip()}}}"
- # save when to end the current block with \cline
- self.clinebuf.append([i + nrow - 1, j + 1])
- return row
- def _compose_cline(self, i: int, icol: int) -> str:
- """
- Create clines after multirow-blocks are finished.
- """
- lst = []
- for cl in self.clinebuf:
- if cl[0] == i:
- lst.append(f"\n\\cline{{{cl[1]:d}-{icol:d}}}")
- # remove entries that have been written to buffer
- self.clinebuf = [x for x in self.clinebuf if x[0] != i]
- return "".join(lst)
- class RowStringIterator(RowStringConverter):
- """Iterator over rows of the header or the body of the table."""
- @abstractmethod
- def __iter__(self) -> Iterator[str]:
- """Iterate over LaTeX string representations of rows."""
- class RowHeaderIterator(RowStringIterator):
- """Iterator for the table header rows."""
- def __iter__(self) -> Iterator[str]:
- for row_num in range(len(self.strrows)):
- if row_num < self._header_row_num:
- yield self.get_strrow(row_num)
- class RowBodyIterator(RowStringIterator):
- """Iterator for the table body rows."""
- def __iter__(self) -> Iterator[str]:
- for row_num in range(len(self.strrows)):
- if row_num >= self._header_row_num:
- yield self.get_strrow(row_num)
- class TableBuilderAbstract(ABC):
- """
- Abstract table builder producing string representation of LaTeX table.
- Parameters
- ----------
- formatter : `DataFrameFormatter`
- Instance of `DataFrameFormatter`.
- column_format: str, optional
- Column format, for example, 'rcl' for three columns.
- multicolumn: bool, optional
- Use multicolumn to enhance MultiIndex columns.
- multicolumn_format: str, optional
- The alignment for multicolumns, similar to column_format.
- multirow: bool, optional
- Use multirow to enhance MultiIndex rows.
- caption: str, optional
- Table caption.
- short_caption: str, optional
- Table short caption.
- label: str, optional
- LaTeX label.
- position: str, optional
- Float placement specifier, for example, 'htb'.
- """
- def __init__(
- self,
- formatter: DataFrameFormatter,
- column_format: str | None = None,
- multicolumn: bool = False,
- multicolumn_format: str | None = None,
- multirow: bool = False,
- caption: str | None = None,
- short_caption: str | None = None,
- label: str | None = None,
- position: str | None = None,
- ) -> None:
- self.fmt = formatter
- self.column_format = column_format
- self.multicolumn = multicolumn
- self.multicolumn_format = multicolumn_format
- self.multirow = multirow
- self.caption = caption
- self.short_caption = short_caption
- self.label = label
- self.position = position
- def get_result(self) -> str:
- """String representation of LaTeX table."""
- elements = [
- self.env_begin,
- self.top_separator,
- self.header,
- self.middle_separator,
- self.env_body,
- self.bottom_separator,
- self.env_end,
- ]
- result = "\n".join([item for item in elements if item])
- trailing_newline = "\n"
- result += trailing_newline
- return result
- @property
- @abstractmethod
- def env_begin(self) -> str:
- """Beginning of the environment."""
- @property
- @abstractmethod
- def top_separator(self) -> str:
- """Top level separator."""
- @property
- @abstractmethod
- def header(self) -> str:
- """Header lines."""
- @property
- @abstractmethod
- def middle_separator(self) -> str:
- """Middle level separator."""
- @property
- @abstractmethod
- def env_body(self) -> str:
- """Environment body."""
- @property
- @abstractmethod
- def bottom_separator(self) -> str:
- """Bottom level separator."""
- @property
- @abstractmethod
- def env_end(self) -> str:
- """End of the environment."""
- class GenericTableBuilder(TableBuilderAbstract):
- """Table builder producing string representation of LaTeX table."""
- @property
- def header(self) -> str:
- iterator = self._create_row_iterator(over="header")
- return "\n".join(list(iterator))
- @property
- def top_separator(self) -> str:
- return "\\toprule"
- @property
- def middle_separator(self) -> str:
- return "\\midrule" if self._is_separator_required() else ""
- @property
- def env_body(self) -> str:
- iterator = self._create_row_iterator(over="body")
- return "\n".join(list(iterator))
- def _is_separator_required(self) -> bool:
- return bool(self.header and self.env_body)
- @property
- def _position_macro(self) -> str:
- r"""Position macro, extracted from self.position, like [h]."""
- return f"[{self.position}]" if self.position else ""
- @property
- def _caption_macro(self) -> str:
- r"""Caption macro, extracted from self.caption.
- With short caption:
- \caption[short_caption]{caption_string}.
- Without short caption:
- \caption{caption_string}.
- """
- if self.caption:
- return "".join(
- [
- r"\caption",
- f"[{self.short_caption}]" if self.short_caption else "",
- f"{{{self.caption}}}",
- ]
- )
- return ""
- @property
- def _label_macro(self) -> str:
- r"""Label macro, extracted from self.label, like \label{ref}."""
- return f"\\label{{{self.label}}}" if self.label else ""
- def _create_row_iterator(self, over: str) -> RowStringIterator:
- """Create iterator over header or body of the table.
- Parameters
- ----------
- over : {'body', 'header'}
- Over what to iterate.
- Returns
- -------
- RowStringIterator
- Iterator over body or header.
- """
- iterator_kind = self._select_iterator(over)
- return iterator_kind(
- formatter=self.fmt,
- multicolumn=self.multicolumn,
- multicolumn_format=self.multicolumn_format,
- multirow=self.multirow,
- )
- def _select_iterator(self, over: str) -> type[RowStringIterator]:
- """Select proper iterator over table rows."""
- if over == "header":
- return RowHeaderIterator
- elif over == "body":
- return RowBodyIterator
- else:
- msg = f"'over' must be either 'header' or 'body', but {over} was provided"
- raise ValueError(msg)
- class LongTableBuilder(GenericTableBuilder):
- """Concrete table builder for longtable.
- >>> from pandas.io.formats import format as fmt
- >>> df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
- >>> formatter = fmt.DataFrameFormatter(df)
- >>> builder = LongTableBuilder(formatter, caption='a long table',
- ... label='tab:long', column_format='lrl')
- >>> table = builder.get_result()
- >>> print(table)
- \\begin{longtable}{lrl}
- \\caption{a long table}
- \\label{tab:long}\\\\
- \\toprule
- {} & a & b \\\\
- \\midrule
- \\endfirsthead
- \\caption[]{a long table} \\\\
- \\toprule
- {} & a & b \\\\
- \\midrule
- \\endhead
- \\midrule
- \\multicolumn{3}{r}{{Continued on next page}} \\\\
- \\midrule
- \\endfoot
- <BLANKLINE>
- \\bottomrule
- \\endlastfoot
- 0 & 1 & b1 \\\\
- 1 & 2 & b2 \\\\
- \\end{longtable}
- <BLANKLINE>
- """
- @property
- def env_begin(self) -> str:
- first_row = (
- f"\\begin{{longtable}}{self._position_macro}{{{self.column_format}}}"
- )
- elements = [first_row, f"{self._caption_and_label()}"]
- return "\n".join([item for item in elements if item])
- def _caption_and_label(self) -> str:
- if self.caption or self.label:
- double_backslash = "\\\\"
- elements = [f"{self._caption_macro}", f"{self._label_macro}"]
- caption_and_label = "\n".join([item for item in elements if item])
- caption_and_label += double_backslash
- return caption_and_label
- else:
- return ""
- @property
- def middle_separator(self) -> str:
- iterator = self._create_row_iterator(over="header")
- # the content between \endfirsthead and \endhead commands
- # mitigates repeated List of Tables entries in the final LaTeX
- # document when dealing with longtable environments; GH #34360
- elements = [
- "\\midrule",
- "\\endfirsthead",
- f"\\caption[]{{{self.caption}}} \\\\" if self.caption else "",
- self.top_separator,
- self.header,
- "\\midrule",
- "\\endhead",
- "\\midrule",
- f"\\multicolumn{{{len(iterator.strcols)}}}{{r}}"
- "{{Continued on next page}} \\\\",
- "\\midrule",
- "\\endfoot\n",
- "\\bottomrule",
- "\\endlastfoot",
- ]
- if self._is_separator_required():
- return "\n".join(elements)
- return ""
- @property
- def bottom_separator(self) -> str:
- return ""
- @property
- def env_end(self) -> str:
- return "\\end{longtable}"
- class RegularTableBuilder(GenericTableBuilder):
- """Concrete table builder for regular table.
- >>> from pandas.io.formats import format as fmt
- >>> df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
- >>> formatter = fmt.DataFrameFormatter(df)
- >>> builder = RegularTableBuilder(formatter, caption='caption', label='lab',
- ... column_format='lrc')
- >>> table = builder.get_result()
- >>> print(table)
- \\begin{table}
- \\centering
- \\caption{caption}
- \\label{lab}
- \\begin{tabular}{lrc}
- \\toprule
- {} & a & b \\\\
- \\midrule
- 0 & 1 & b1 \\\\
- 1 & 2 & b2 \\\\
- \\bottomrule
- \\end{tabular}
- \\end{table}
- <BLANKLINE>
- """
- @property
- def env_begin(self) -> str:
- elements = [
- f"\\begin{{table}}{self._position_macro}",
- "\\centering",
- f"{self._caption_macro}",
- f"{self._label_macro}",
- f"\\begin{{tabular}}{{{self.column_format}}}",
- ]
- return "\n".join([item for item in elements if item])
- @property
- def bottom_separator(self) -> str:
- return "\\bottomrule"
- @property
- def env_end(self) -> str:
- return "\n".join(["\\end{tabular}", "\\end{table}"])
- class TabularBuilder(GenericTableBuilder):
- """Concrete table builder for tabular environment.
- >>> from pandas.io.formats import format as fmt
- >>> df = pd.DataFrame({"a": [1, 2], "b": ["b1", "b2"]})
- >>> formatter = fmt.DataFrameFormatter(df)
- >>> builder = TabularBuilder(formatter, column_format='lrc')
- >>> table = builder.get_result()
- >>> print(table)
- \\begin{tabular}{lrc}
- \\toprule
- {} & a & b \\\\
- \\midrule
- 0 & 1 & b1 \\\\
- 1 & 2 & b2 \\\\
- \\bottomrule
- \\end{tabular}
- <BLANKLINE>
- """
- @property
- def env_begin(self) -> str:
- return f"\\begin{{tabular}}{{{self.column_format}}}"
- @property
- def bottom_separator(self) -> str:
- return "\\bottomrule"
- @property
- def env_end(self) -> str:
- return "\\end{tabular}"
- class LatexFormatter:
- r"""
- Used to render a DataFrame to a LaTeX tabular/longtable environment output.
- Parameters
- ----------
- formatter : `DataFrameFormatter`
- longtable : bool, default False
- Use longtable environment.
- column_format : str, default None
- The columns format as specified in `LaTeX table format
- <https://en.wikibooks.org/wiki/LaTeX/Tables>`__ e.g 'rcl' for 3 columns
- multicolumn : bool, default False
- Use \multicolumn to enhance MultiIndex columns.
- multicolumn_format : str, default 'l'
- The alignment for multicolumns, similar to `column_format`
- multirow : bool, default False
- Use \multirow to enhance MultiIndex rows.
- caption : str or tuple, optional
- Tuple (full_caption, short_caption),
- which results in \caption[short_caption]{full_caption};
- if a single string is passed, no short caption will be set.
- label : str, optional
- The LaTeX label to be placed inside ``\label{}`` in the output.
- position : str, optional
- The LaTeX positional argument for tables, to be placed after
- ``\begin{}`` in the output.
- See Also
- --------
- HTMLFormatter
- """
- def __init__(
- self,
- formatter: DataFrameFormatter,
- longtable: bool = False,
- column_format: str | None = None,
- multicolumn: bool = False,
- multicolumn_format: str | None = None,
- multirow: bool = False,
- caption: str | tuple[str, str] | None = None,
- label: str | None = None,
- position: str | None = None,
- ) -> None:
- self.fmt = formatter
- self.frame = self.fmt.frame
- self.longtable = longtable
- self.column_format = column_format
- self.multicolumn = multicolumn
- self.multicolumn_format = multicolumn_format
- self.multirow = multirow
- self.caption, self.short_caption = _split_into_full_short_caption(caption)
- self.label = label
- self.position = position
- def to_string(self) -> str:
- """
- Render a DataFrame to a LaTeX tabular, longtable, or table/tabular
- environment output.
- """
- return self.builder.get_result()
- @property
- def builder(self) -> TableBuilderAbstract:
- """Concrete table builder.
- Returns
- -------
- TableBuilder
- """
- builder = self._select_builder()
- return builder(
- formatter=self.fmt,
- column_format=self.column_format,
- multicolumn=self.multicolumn,
- multicolumn_format=self.multicolumn_format,
- multirow=self.multirow,
- caption=self.caption,
- short_caption=self.short_caption,
- label=self.label,
- position=self.position,
- )
- def _select_builder(self) -> type[TableBuilderAbstract]:
- """Select proper table builder."""
- if self.longtable:
- return LongTableBuilder
- if any([self.caption, self.label, self.position]):
- return RegularTableBuilder
- return TabularBuilder
- @property
- def column_format(self) -> str | None:
- """Column format."""
- return self._column_format
- @column_format.setter
- def column_format(self, input_column_format: str | None) -> None:
- """Setter for column format."""
- if input_column_format is None:
- self._column_format = (
- self._get_index_format() + self._get_column_format_based_on_dtypes()
- )
- elif not isinstance(input_column_format, str):
- raise ValueError(
- f"column_format must be str or unicode, "
- f"not {type(input_column_format)}"
- )
- else:
- self._column_format = input_column_format
- def _get_column_format_based_on_dtypes(self) -> str:
- """Get column format based on data type.
- Right alignment for numbers and left - for strings.
- """
- def get_col_type(dtype) -> str:
- if issubclass(dtype.type, np.number):
- return "r"
- return "l"
- dtypes = self.frame.dtypes._values
- return "".join(map(get_col_type, dtypes))
- def _get_index_format(self) -> str:
- """Get index column format."""
- return "l" * self.frame.index.nlevels if self.fmt.index else ""
- def _escape_symbols(row: Sequence[str]) -> list[str]:
- """Carry out string replacements for special symbols.
- Parameters
- ----------
- row : list
- List of string, that may contain special symbols.
- Returns
- -------
- list
- list of strings with the special symbols replaced.
- """
- return [
- (
- x.replace("\\", "\\textbackslash ")
- .replace("_", "\\_")
- .replace("%", "\\%")
- .replace("$", "\\$")
- .replace("#", "\\#")
- .replace("{", "\\{")
- .replace("}", "\\}")
- .replace("~", "\\textasciitilde ")
- .replace("^", "\\textasciicircum ")
- .replace("&", "\\&")
- if (x and x != "{}")
- else "{}"
- )
- for x in row
- ]
- def _convert_to_bold(crow: Sequence[str], ilevels: int) -> list[str]:
- """Convert elements in ``crow`` to bold."""
- return [
- f"\\textbf{{{x}}}" if j < ilevels and x.strip() not in ["", "{}"] else x
- for j, x in enumerate(crow)
- ]
- if __name__ == "__main__":
- import doctest
- doctest.testmod()
|