printing.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504
  1. """
  2. Printing tools.
  3. """
  4. from __future__ import annotations
  5. import sys
  6. from typing import (
  7. Any,
  8. Callable,
  9. Dict,
  10. Iterable,
  11. Mapping,
  12. Sequence,
  13. TypeVar,
  14. Union,
  15. )
  16. from pandas._config import get_option
  17. from pandas.core.dtypes.inference import is_sequence
  18. EscapeChars = Union[Mapping[str, str], Iterable[str]]
  19. _KT = TypeVar("_KT")
  20. _VT = TypeVar("_VT")
  21. def adjoin(space: int, *lists: list[str], **kwargs) -> str:
  22. """
  23. Glues together two sets of strings using the amount of space requested.
  24. The idea is to prettify.
  25. ----------
  26. space : int
  27. number of spaces for padding
  28. lists : str
  29. list of str which being joined
  30. strlen : callable
  31. function used to calculate the length of each str. Needed for unicode
  32. handling.
  33. justfunc : callable
  34. function used to justify str. Needed for unicode handling.
  35. """
  36. strlen = kwargs.pop("strlen", len)
  37. justfunc = kwargs.pop("justfunc", justify)
  38. out_lines = []
  39. newLists = []
  40. lengths = [max(map(strlen, x)) + space for x in lists[:-1]]
  41. # not the last one
  42. lengths.append(max(map(len, lists[-1])))
  43. maxLen = max(map(len, lists))
  44. for i, lst in enumerate(lists):
  45. nl = justfunc(lst, lengths[i], mode="left")
  46. nl = ([" " * lengths[i]] * (maxLen - len(lst))) + nl
  47. newLists.append(nl)
  48. toJoin = zip(*newLists)
  49. for lines in toJoin:
  50. out_lines.append("".join(lines))
  51. return "\n".join(out_lines)
  52. def justify(texts: Iterable[str], max_len: int, mode: str = "right") -> list[str]:
  53. """
  54. Perform ljust, center, rjust against string or list-like
  55. """
  56. if mode == "left":
  57. return [x.ljust(max_len) for x in texts]
  58. elif mode == "center":
  59. return [x.center(max_len) for x in texts]
  60. else:
  61. return [x.rjust(max_len) for x in texts]
  62. # Unicode consolidation
  63. # ---------------------
  64. #
  65. # pprinting utility functions for generating Unicode text or
  66. # bytes(3.x)/str(2.x) representations of objects.
  67. # Try to use these as much as possible rather than rolling your own.
  68. #
  69. # When to use
  70. # -----------
  71. #
  72. # 1) If you're writing code internal to pandas (no I/O directly involved),
  73. # use pprint_thing().
  74. #
  75. # It will always return unicode text which can handled by other
  76. # parts of the package without breakage.
  77. #
  78. # 2) if you need to write something out to file, use
  79. # pprint_thing_encoded(encoding).
  80. #
  81. # If no encoding is specified, it defaults to utf-8. Since encoding pure
  82. # ascii with utf-8 is a no-op you can safely use the default utf-8 if you're
  83. # working with straight ascii.
  84. def _pprint_seq(
  85. seq: Sequence, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds
  86. ) -> str:
  87. """
  88. internal. pprinter for iterables. you should probably use pprint_thing()
  89. rather than calling this directly.
  90. bounds length of printed sequence, depending on options
  91. """
  92. if isinstance(seq, set):
  93. fmt = "{{{body}}}"
  94. else:
  95. fmt = "[{body}]" if hasattr(seq, "__setitem__") else "({body})"
  96. if max_seq_items is False:
  97. nitems = len(seq)
  98. else:
  99. nitems = max_seq_items or get_option("max_seq_items") or len(seq)
  100. s = iter(seq)
  101. # handle sets, no slicing
  102. r = [
  103. pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
  104. for i in range(min(nitems, len(seq)))
  105. ]
  106. body = ", ".join(r)
  107. if nitems < len(seq):
  108. body += ", ..."
  109. elif isinstance(seq, tuple) and len(seq) == 1:
  110. body += ","
  111. return fmt.format(body=body)
  112. def _pprint_dict(
  113. seq: Mapping, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds
  114. ) -> str:
  115. """
  116. internal. pprinter for iterables. you should probably use pprint_thing()
  117. rather than calling this directly.
  118. """
  119. fmt = "{{{things}}}"
  120. pairs = []
  121. pfmt = "{key}: {val}"
  122. if max_seq_items is False:
  123. nitems = len(seq)
  124. else:
  125. nitems = max_seq_items or get_option("max_seq_items") or len(seq)
  126. for k, v in list(seq.items())[:nitems]:
  127. pairs.append(
  128. pfmt.format(
  129. key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
  130. val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
  131. )
  132. )
  133. if nitems < len(seq):
  134. return fmt.format(things=", ".join(pairs) + ", ...")
  135. else:
  136. return fmt.format(things=", ".join(pairs))
  137. def pprint_thing(
  138. thing: Any,
  139. _nest_lvl: int = 0,
  140. escape_chars: EscapeChars | None = None,
  141. default_escapes: bool = False,
  142. quote_strings: bool = False,
  143. max_seq_items: int | None = None,
  144. ) -> str:
  145. """
  146. This function is the sanctioned way of converting objects
  147. to a string representation and properly handles nested sequences.
  148. Parameters
  149. ----------
  150. thing : anything to be formatted
  151. _nest_lvl : internal use only. pprint_thing() is mutually-recursive
  152. with pprint_sequence, this argument is used to keep track of the
  153. current nesting level, and limit it.
  154. escape_chars : list or dict, optional
  155. Characters to escape. If a dict is passed the values are the
  156. replacements
  157. default_escapes : bool, default False
  158. Whether the input escape characters replaces or adds to the defaults
  159. max_seq_items : int or None, default None
  160. Pass through to other pretty printers to limit sequence printing
  161. Returns
  162. -------
  163. str
  164. """
  165. def as_escaped_string(
  166. thing: Any, escape_chars: EscapeChars | None = escape_chars
  167. ) -> str:
  168. translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"}
  169. if isinstance(escape_chars, dict):
  170. if default_escapes:
  171. translate.update(escape_chars)
  172. else:
  173. translate = escape_chars
  174. escape_chars = list(escape_chars.keys())
  175. else:
  176. escape_chars = escape_chars or ()
  177. result = str(thing)
  178. for c in escape_chars:
  179. result = result.replace(c, translate[c])
  180. return result
  181. if hasattr(thing, "__next__"):
  182. return str(thing)
  183. elif isinstance(thing, dict) and _nest_lvl < get_option(
  184. "display.pprint_nest_depth"
  185. ):
  186. result = _pprint_dict(
  187. thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items
  188. )
  189. elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"):
  190. result = _pprint_seq(
  191. thing,
  192. _nest_lvl,
  193. escape_chars=escape_chars,
  194. quote_strings=quote_strings,
  195. max_seq_items=max_seq_items,
  196. )
  197. elif isinstance(thing, str) and quote_strings:
  198. result = f"'{as_escaped_string(thing)}'"
  199. else:
  200. result = as_escaped_string(thing)
  201. return result
  202. def pprint_thing_encoded(
  203. object, encoding: str = "utf-8", errors: str = "replace"
  204. ) -> bytes:
  205. value = pprint_thing(object) # get unicode representation of object
  206. return value.encode(encoding, errors)
  207. def enable_data_resource_formatter(enable: bool) -> None:
  208. if "IPython" not in sys.modules:
  209. # definitely not in IPython
  210. return
  211. from IPython import get_ipython
  212. ip = get_ipython()
  213. if ip is None:
  214. # still not in IPython
  215. return
  216. formatters = ip.display_formatter.formatters
  217. mimetype = "application/vnd.dataresource+json"
  218. if enable:
  219. if mimetype not in formatters:
  220. # define tableschema formatter
  221. from IPython.core.formatters import BaseFormatter
  222. from traitlets import ObjectName
  223. class TableSchemaFormatter(BaseFormatter):
  224. print_method = ObjectName("_repr_data_resource_")
  225. _return_type = (dict,)
  226. # register it:
  227. formatters[mimetype] = TableSchemaFormatter()
  228. # enable it if it's been disabled:
  229. formatters[mimetype].enabled = True
  230. else:
  231. # unregister tableschema mime-type
  232. if mimetype in formatters:
  233. formatters[mimetype].enabled = False
  234. def default_pprint(thing: Any, max_seq_items: int | None = None) -> str:
  235. return pprint_thing(
  236. thing,
  237. escape_chars=("\t", "\r", "\n"),
  238. quote_strings=True,
  239. max_seq_items=max_seq_items,
  240. )
  241. def format_object_summary(
  242. obj,
  243. formatter: Callable,
  244. is_justify: bool = True,
  245. name: str | None = None,
  246. indent_for_name: bool = True,
  247. line_break_each_value: bool = False,
  248. ) -> str:
  249. """
  250. Return the formatted obj as a unicode string
  251. Parameters
  252. ----------
  253. obj : object
  254. must be iterable and support __getitem__
  255. formatter : callable
  256. string formatter for an element
  257. is_justify : bool
  258. should justify the display
  259. name : name, optional
  260. defaults to the class name of the obj
  261. indent_for_name : bool, default True
  262. Whether subsequent lines should be indented to
  263. align with the name.
  264. line_break_each_value : bool, default False
  265. If True, inserts a line break for each value of ``obj``.
  266. If False, only break lines when the a line of values gets wider
  267. than the display width.
  268. Returns
  269. -------
  270. summary string
  271. """
  272. from pandas.io.formats.console import get_console_size
  273. from pandas.io.formats.format import get_adjustment
  274. display_width, _ = get_console_size()
  275. if display_width is None:
  276. display_width = get_option("display.width") or 80
  277. if name is None:
  278. name = type(obj).__name__
  279. if indent_for_name:
  280. name_len = len(name)
  281. space1 = f'\n{(" " * (name_len + 1))}'
  282. space2 = f'\n{(" " * (name_len + 2))}'
  283. else:
  284. space1 = "\n"
  285. space2 = "\n " # space for the opening '['
  286. n = len(obj)
  287. if line_break_each_value:
  288. # If we want to vertically align on each value of obj, we need to
  289. # separate values by a line break and indent the values
  290. sep = ",\n " + " " * len(name)
  291. else:
  292. sep = ","
  293. max_seq_items = get_option("display.max_seq_items") or n
  294. # are we a truncated display
  295. is_truncated = n > max_seq_items
  296. # adj can optionally handle unicode eastern asian width
  297. adj = get_adjustment()
  298. def _extend_line(
  299. s: str, line: str, value: str, display_width: int, next_line_prefix: str
  300. ) -> tuple[str, str]:
  301. if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width:
  302. s += line.rstrip()
  303. line = next_line_prefix
  304. line += value
  305. return s, line
  306. def best_len(values: list[str]) -> int:
  307. if values:
  308. return max(adj.len(x) for x in values)
  309. else:
  310. return 0
  311. close = ", "
  312. if n == 0:
  313. summary = f"[]{close}"
  314. elif n == 1 and not line_break_each_value:
  315. first = formatter(obj[0])
  316. summary = f"[{first}]{close}"
  317. elif n == 2 and not line_break_each_value:
  318. first = formatter(obj[0])
  319. last = formatter(obj[-1])
  320. summary = f"[{first}, {last}]{close}"
  321. else:
  322. if max_seq_items == 1:
  323. # If max_seq_items=1 show only last element
  324. head = []
  325. tail = [formatter(x) for x in obj[-1:]]
  326. elif n > max_seq_items:
  327. n = min(max_seq_items // 2, 10)
  328. head = [formatter(x) for x in obj[:n]]
  329. tail = [formatter(x) for x in obj[-n:]]
  330. else:
  331. head = []
  332. tail = [formatter(x) for x in obj]
  333. # adjust all values to max length if needed
  334. if is_justify:
  335. if line_break_each_value:
  336. # Justify each string in the values of head and tail, so the
  337. # strings will right align when head and tail are stacked
  338. # vertically.
  339. head, tail = _justify(head, tail)
  340. elif is_truncated or not (
  341. len(", ".join(head)) < display_width
  342. and len(", ".join(tail)) < display_width
  343. ):
  344. # Each string in head and tail should align with each other
  345. max_length = max(best_len(head), best_len(tail))
  346. head = [x.rjust(max_length) for x in head]
  347. tail = [x.rjust(max_length) for x in tail]
  348. # If we are not truncated and we are only a single
  349. # line, then don't justify
  350. if line_break_each_value:
  351. # Now head and tail are of type List[Tuple[str]]. Below we
  352. # convert them into List[str], so there will be one string per
  353. # value. Also truncate items horizontally if wider than
  354. # max_space
  355. max_space = display_width - len(space2)
  356. value = tail[0]
  357. for max_items in reversed(range(1, len(value) + 1)):
  358. pprinted_seq = _pprint_seq(value, max_seq_items=max_items)
  359. if len(pprinted_seq) < max_space:
  360. head = [_pprint_seq(x, max_seq_items=max_items) for x in head]
  361. tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail]
  362. break
  363. summary = ""
  364. line = space2
  365. for head_value in head:
  366. word = head_value + sep + " "
  367. summary, line = _extend_line(summary, line, word, display_width, space2)
  368. if is_truncated:
  369. # remove trailing space of last line
  370. summary += line.rstrip() + space2 + "..."
  371. line = space2
  372. for tail_item in tail[:-1]:
  373. word = tail_item + sep + " "
  374. summary, line = _extend_line(summary, line, word, display_width, space2)
  375. # last value: no sep added + 1 space of width used for trailing ','
  376. summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2)
  377. summary += line
  378. # right now close is either '' or ', '
  379. # Now we want to include the ']', but not the maybe space.
  380. close = "]" + close.rstrip(" ")
  381. summary += close
  382. if len(summary) > (display_width) or line_break_each_value:
  383. summary += space1
  384. else: # one row
  385. summary += " "
  386. # remove initial space
  387. summary = "[" + summary[len(space2) :]
  388. return summary
  389. def _justify(
  390. head: list[Sequence[str]], tail: list[Sequence[str]]
  391. ) -> tuple[list[tuple[str, ...]], list[tuple[str, ...]]]:
  392. """
  393. Justify items in head and tail, so they are right-aligned when stacked.
  394. Parameters
  395. ----------
  396. head : list-like of list-likes of strings
  397. tail : list-like of list-likes of strings
  398. Returns
  399. -------
  400. tuple of list of tuples of strings
  401. Same as head and tail, but items are right aligned when stacked
  402. vertically.
  403. Examples
  404. --------
  405. >>> _justify([['a', 'b']], [['abc', 'abcd']])
  406. ([(' a', ' b')], [('abc', 'abcd')])
  407. """
  408. combined = head + tail
  409. # For each position for the sequences in ``combined``,
  410. # find the length of the largest string.
  411. max_length = [0] * len(combined[0])
  412. for inner_seq in combined:
  413. length = [len(item) for item in inner_seq]
  414. max_length = [max(x, y) for x, y in zip(max_length, length)]
  415. # justify each item in each list-like in head and tail using max_length
  416. head_tuples = [
  417. tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in head
  418. ]
  419. tail_tuples = [
  420. tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in tail
  421. ]
  422. return head_tuples, tail_tuples
  423. class PrettyDict(Dict[_KT, _VT]):
  424. """Dict extension to support abbreviated __repr__"""
  425. def __repr__(self) -> str:
  426. return pprint_thing(self)