string.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207
  1. """
  2. Module for formatting output data in console (to string).
  3. """
  4. from __future__ import annotations
  5. from shutil import get_terminal_size
  6. from typing import (
  7. TYPE_CHECKING,
  8. Iterable,
  9. )
  10. import numpy as np
  11. from pandas.io.formats.printing import pprint_thing
  12. if TYPE_CHECKING:
  13. from pandas.io.formats.format import DataFrameFormatter
  14. class StringFormatter:
  15. """Formatter for string representation of a dataframe."""
  16. def __init__(self, fmt: DataFrameFormatter, line_width: int | None = None) -> None:
  17. self.fmt = fmt
  18. self.adj = fmt.adj
  19. self.frame = fmt.frame
  20. self.line_width = line_width
  21. def to_string(self) -> str:
  22. text = self._get_string_representation()
  23. if self.fmt.should_show_dimensions:
  24. text = "".join([text, self.fmt.dimensions_info])
  25. return text
  26. def _get_strcols(self) -> list[list[str]]:
  27. strcols = self.fmt.get_strcols()
  28. if self.fmt.is_truncated:
  29. strcols = self._insert_dot_separators(strcols)
  30. return strcols
  31. def _get_string_representation(self) -> str:
  32. if self.fmt.frame.empty:
  33. return self._empty_info_line
  34. strcols = self._get_strcols()
  35. if self.line_width is None:
  36. # no need to wrap around just print the whole frame
  37. return self.adj.adjoin(1, *strcols)
  38. if self._need_to_wrap_around:
  39. return self._join_multiline(strcols)
  40. return self._fit_strcols_to_terminal_width(strcols)
  41. @property
  42. def _empty_info_line(self) -> str:
  43. return (
  44. f"Empty {type(self.frame).__name__}\n"
  45. f"Columns: {pprint_thing(self.frame.columns)}\n"
  46. f"Index: {pprint_thing(self.frame.index)}"
  47. )
  48. @property
  49. def _need_to_wrap_around(self) -> bool:
  50. return bool(self.fmt.max_cols is None or self.fmt.max_cols > 0)
  51. def _insert_dot_separators(self, strcols: list[list[str]]) -> list[list[str]]:
  52. str_index = self.fmt._get_formatted_index(self.fmt.tr_frame)
  53. index_length = len(str_index)
  54. if self.fmt.is_truncated_horizontally:
  55. strcols = self._insert_dot_separator_horizontal(strcols, index_length)
  56. if self.fmt.is_truncated_vertically:
  57. strcols = self._insert_dot_separator_vertical(strcols, index_length)
  58. return strcols
  59. @property
  60. def _adjusted_tr_col_num(self) -> int:
  61. return self.fmt.tr_col_num + 1 if self.fmt.index else self.fmt.tr_col_num
  62. def _insert_dot_separator_horizontal(
  63. self, strcols: list[list[str]], index_length: int
  64. ) -> list[list[str]]:
  65. strcols.insert(self._adjusted_tr_col_num, [" ..."] * index_length)
  66. return strcols
  67. def _insert_dot_separator_vertical(
  68. self, strcols: list[list[str]], index_length: int
  69. ) -> list[list[str]]:
  70. n_header_rows = index_length - len(self.fmt.tr_frame)
  71. row_num = self.fmt.tr_row_num
  72. for ix, col in enumerate(strcols):
  73. cwidth = self.adj.len(col[row_num])
  74. if self.fmt.is_truncated_horizontally:
  75. is_dot_col = ix == self._adjusted_tr_col_num
  76. else:
  77. is_dot_col = False
  78. if cwidth > 3 or is_dot_col:
  79. dots = "..."
  80. else:
  81. dots = ".."
  82. if ix == 0 and self.fmt.index:
  83. dot_mode = "left"
  84. elif is_dot_col:
  85. cwidth = 4
  86. dot_mode = "right"
  87. else:
  88. dot_mode = "right"
  89. dot_str = self.adj.justify([dots], cwidth, mode=dot_mode)[0]
  90. col.insert(row_num + n_header_rows, dot_str)
  91. return strcols
  92. def _join_multiline(self, strcols_input: Iterable[list[str]]) -> str:
  93. lwidth = self.line_width
  94. adjoin_width = 1
  95. strcols = list(strcols_input)
  96. if self.fmt.index:
  97. idx = strcols.pop(0)
  98. lwidth -= np.array([self.adj.len(x) for x in idx]).max() + adjoin_width
  99. col_widths = [
  100. np.array([self.adj.len(x) for x in col]).max() if len(col) > 0 else 0
  101. for col in strcols
  102. ]
  103. assert lwidth is not None
  104. col_bins = _binify(col_widths, lwidth)
  105. nbins = len(col_bins)
  106. str_lst = []
  107. start = 0
  108. for i, end in enumerate(col_bins):
  109. row = strcols[start:end]
  110. if self.fmt.index:
  111. row.insert(0, idx)
  112. if nbins > 1:
  113. nrows = len(row[-1])
  114. if end <= len(strcols) and i < nbins - 1:
  115. row.append([" \\"] + [" "] * (nrows - 1))
  116. else:
  117. row.append([" "] * nrows)
  118. str_lst.append(self.adj.adjoin(adjoin_width, *row))
  119. start = end
  120. return "\n\n".join(str_lst)
  121. def _fit_strcols_to_terminal_width(self, strcols: list[list[str]]) -> str:
  122. from pandas import Series
  123. lines = self.adj.adjoin(1, *strcols).split("\n")
  124. max_len = Series(lines).str.len().max()
  125. # plus truncate dot col
  126. width, _ = get_terminal_size()
  127. dif = max_len - width
  128. # '+ 1' to avoid too wide repr (GH PR #17023)
  129. adj_dif = dif + 1
  130. col_lens = Series([Series(ele).apply(len).max() for ele in strcols])
  131. n_cols = len(col_lens)
  132. counter = 0
  133. while adj_dif > 0 and n_cols > 1:
  134. counter += 1
  135. mid = round(n_cols / 2)
  136. mid_ix = col_lens.index[mid]
  137. col_len = col_lens[mid_ix]
  138. # adjoin adds one
  139. adj_dif -= col_len + 1
  140. col_lens = col_lens.drop(mid_ix)
  141. n_cols = len(col_lens)
  142. # subtract index column
  143. max_cols_fitted = n_cols - self.fmt.index
  144. # GH-21180. Ensure that we print at least two.
  145. max_cols_fitted = max(max_cols_fitted, 2)
  146. self.fmt.max_cols_fitted = max_cols_fitted
  147. # Call again _truncate to cut frame appropriately
  148. # and then generate string representation
  149. self.fmt.truncate()
  150. strcols = self._get_strcols()
  151. return self.adj.adjoin(1, *strcols)
  152. def _binify(cols: list[int], line_width: int) -> list[int]:
  153. adjoin_width = 1
  154. bins = []
  155. curr_width = 0
  156. i_last_column = len(cols) - 1
  157. for i, w in enumerate(cols):
  158. w_adjoined = w + adjoin_width
  159. curr_width += w_adjoined
  160. if i_last_column == i:
  161. wrap = curr_width + 1 > line_width and i > 0
  162. else:
  163. wrap = curr_width + 2 > line_width and i > 0
  164. if wrap:
  165. bins.append(i)
  166. curr_width = w_adjoined
  167. bins.append(len(cols))
  168. return bins