123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207 |
- """
- Module for formatting output data in console (to string).
- """
- from __future__ import annotations
- from shutil import get_terminal_size
- from typing import (
- TYPE_CHECKING,
- Iterable,
- )
- import numpy as np
- from pandas.io.formats.printing import pprint_thing
- if TYPE_CHECKING:
- from pandas.io.formats.format import DataFrameFormatter
- class StringFormatter:
- """Formatter for string representation of a dataframe."""
- def __init__(self, fmt: DataFrameFormatter, line_width: int | None = None) -> None:
- self.fmt = fmt
- self.adj = fmt.adj
- self.frame = fmt.frame
- self.line_width = line_width
- def to_string(self) -> str:
- text = self._get_string_representation()
- if self.fmt.should_show_dimensions:
- text = "".join([text, self.fmt.dimensions_info])
- return text
- def _get_strcols(self) -> list[list[str]]:
- strcols = self.fmt.get_strcols()
- if self.fmt.is_truncated:
- strcols = self._insert_dot_separators(strcols)
- return strcols
- def _get_string_representation(self) -> str:
- if self.fmt.frame.empty:
- return self._empty_info_line
- strcols = self._get_strcols()
- if self.line_width is None:
- # no need to wrap around just print the whole frame
- return self.adj.adjoin(1, *strcols)
- if self._need_to_wrap_around:
- return self._join_multiline(strcols)
- return self._fit_strcols_to_terminal_width(strcols)
- @property
- def _empty_info_line(self) -> str:
- return (
- f"Empty {type(self.frame).__name__}\n"
- f"Columns: {pprint_thing(self.frame.columns)}\n"
- f"Index: {pprint_thing(self.frame.index)}"
- )
- @property
- def _need_to_wrap_around(self) -> bool:
- return bool(self.fmt.max_cols is None or self.fmt.max_cols > 0)
- def _insert_dot_separators(self, strcols: list[list[str]]) -> list[list[str]]:
- str_index = self.fmt._get_formatted_index(self.fmt.tr_frame)
- index_length = len(str_index)
- if self.fmt.is_truncated_horizontally:
- strcols = self._insert_dot_separator_horizontal(strcols, index_length)
- if self.fmt.is_truncated_vertically:
- strcols = self._insert_dot_separator_vertical(strcols, index_length)
- return strcols
- @property
- def _adjusted_tr_col_num(self) -> int:
- return self.fmt.tr_col_num + 1 if self.fmt.index else self.fmt.tr_col_num
- def _insert_dot_separator_horizontal(
- self, strcols: list[list[str]], index_length: int
- ) -> list[list[str]]:
- strcols.insert(self._adjusted_tr_col_num, [" ..."] * index_length)
- return strcols
- def _insert_dot_separator_vertical(
- self, strcols: list[list[str]], index_length: int
- ) -> list[list[str]]:
- n_header_rows = index_length - len(self.fmt.tr_frame)
- row_num = self.fmt.tr_row_num
- for ix, col in enumerate(strcols):
- cwidth = self.adj.len(col[row_num])
- if self.fmt.is_truncated_horizontally:
- is_dot_col = ix == self._adjusted_tr_col_num
- else:
- is_dot_col = False
- if cwidth > 3 or is_dot_col:
- dots = "..."
- else:
- dots = ".."
- if ix == 0 and self.fmt.index:
- dot_mode = "left"
- elif is_dot_col:
- cwidth = 4
- dot_mode = "right"
- else:
- dot_mode = "right"
- dot_str = self.adj.justify([dots], cwidth, mode=dot_mode)[0]
- col.insert(row_num + n_header_rows, dot_str)
- return strcols
- def _join_multiline(self, strcols_input: Iterable[list[str]]) -> str:
- lwidth = self.line_width
- adjoin_width = 1
- strcols = list(strcols_input)
- if self.fmt.index:
- idx = strcols.pop(0)
- lwidth -= np.array([self.adj.len(x) for x in idx]).max() + adjoin_width
- col_widths = [
- np.array([self.adj.len(x) for x in col]).max() if len(col) > 0 else 0
- for col in strcols
- ]
- assert lwidth is not None
- col_bins = _binify(col_widths, lwidth)
- nbins = len(col_bins)
- str_lst = []
- start = 0
- for i, end in enumerate(col_bins):
- row = strcols[start:end]
- if self.fmt.index:
- row.insert(0, idx)
- if nbins > 1:
- nrows = len(row[-1])
- if end <= len(strcols) and i < nbins - 1:
- row.append([" \\"] + [" "] * (nrows - 1))
- else:
- row.append([" "] * nrows)
- str_lst.append(self.adj.adjoin(adjoin_width, *row))
- start = end
- return "\n\n".join(str_lst)
- def _fit_strcols_to_terminal_width(self, strcols: list[list[str]]) -> str:
- from pandas import Series
- lines = self.adj.adjoin(1, *strcols).split("\n")
- max_len = Series(lines).str.len().max()
- # plus truncate dot col
- width, _ = get_terminal_size()
- dif = max_len - width
- # '+ 1' to avoid too wide repr (GH PR #17023)
- adj_dif = dif + 1
- col_lens = Series([Series(ele).apply(len).max() for ele in strcols])
- n_cols = len(col_lens)
- counter = 0
- while adj_dif > 0 and n_cols > 1:
- counter += 1
- mid = round(n_cols / 2)
- mid_ix = col_lens.index[mid]
- col_len = col_lens[mid_ix]
- # adjoin adds one
- adj_dif -= col_len + 1
- col_lens = col_lens.drop(mid_ix)
- n_cols = len(col_lens)
- # subtract index column
- max_cols_fitted = n_cols - self.fmt.index
- # GH-21180. Ensure that we print at least two.
- max_cols_fitted = max(max_cols_fitted, 2)
- self.fmt.max_cols_fitted = max_cols_fitted
- # Call again _truncate to cut frame appropriately
- # and then generate string representation
- self.fmt.truncate()
- strcols = self._get_strcols()
- return self.adj.adjoin(1, *strcols)
- def _binify(cols: list[int], line_width: int) -> list[int]:
- adjoin_width = 1
- bins = []
- curr_width = 0
- i_last_column = len(cols) - 1
- for i, w in enumerate(cols):
- w_adjoined = w + adjoin_width
- curr_width += w_adjoined
- if i_last_column == i:
- wrap = curr_width + 1 > line_width and i > 0
- else:
- wrap = curr_width + 2 > line_width and i > 0
- if wrap:
- bins.append(i)
- curr_width = w_adjoined
- bins.append(len(cols))
- return bins
|