123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504 |
- """
- Printing tools.
- """
- from __future__ import annotations
- import sys
- from typing import (
- Any,
- Callable,
- Dict,
- Iterable,
- Mapping,
- Sequence,
- TypeVar,
- Union,
- )
- from pandas._config import get_option
- from pandas.core.dtypes.inference import is_sequence
- EscapeChars = Union[Mapping[str, str], Iterable[str]]
- _KT = TypeVar("_KT")
- _VT = TypeVar("_VT")
- def adjoin(space: int, *lists: list[str], **kwargs) -> str:
- """
- Glues together two sets of strings using the amount of space requested.
- The idea is to prettify.
- ----------
- space : int
- number of spaces for padding
- lists : str
- list of str which being joined
- strlen : callable
- function used to calculate the length of each str. Needed for unicode
- handling.
- justfunc : callable
- function used to justify str. Needed for unicode handling.
- """
- strlen = kwargs.pop("strlen", len)
- justfunc = kwargs.pop("justfunc", justify)
- out_lines = []
- newLists = []
- lengths = [max(map(strlen, x)) + space for x in lists[:-1]]
- # not the last one
- lengths.append(max(map(len, lists[-1])))
- maxLen = max(map(len, lists))
- for i, lst in enumerate(lists):
- nl = justfunc(lst, lengths[i], mode="left")
- nl = ([" " * lengths[i]] * (maxLen - len(lst))) + nl
- newLists.append(nl)
- toJoin = zip(*newLists)
- for lines in toJoin:
- out_lines.append("".join(lines))
- return "\n".join(out_lines)
- def justify(texts: Iterable[str], max_len: int, mode: str = "right") -> list[str]:
- """
- Perform ljust, center, rjust against string or list-like
- """
- if mode == "left":
- return [x.ljust(max_len) for x in texts]
- elif mode == "center":
- return [x.center(max_len) for x in texts]
- else:
- return [x.rjust(max_len) for x in texts]
- # Unicode consolidation
- # ---------------------
- #
- # pprinting utility functions for generating Unicode text or
- # bytes(3.x)/str(2.x) representations of objects.
- # Try to use these as much as possible rather than rolling your own.
- #
- # When to use
- # -----------
- #
- # 1) If you're writing code internal to pandas (no I/O directly involved),
- # use pprint_thing().
- #
- # It will always return unicode text which can handled by other
- # parts of the package without breakage.
- #
- # 2) if you need to write something out to file, use
- # pprint_thing_encoded(encoding).
- #
- # If no encoding is specified, it defaults to utf-8. Since encoding pure
- # ascii with utf-8 is a no-op you can safely use the default utf-8 if you're
- # working with straight ascii.
- def _pprint_seq(
- seq: Sequence, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds
- ) -> str:
- """
- internal. pprinter for iterables. you should probably use pprint_thing()
- rather than calling this directly.
- bounds length of printed sequence, depending on options
- """
- if isinstance(seq, set):
- fmt = "{{{body}}}"
- else:
- fmt = "[{body}]" if hasattr(seq, "__setitem__") else "({body})"
- if max_seq_items is False:
- nitems = len(seq)
- else:
- nitems = max_seq_items or get_option("max_seq_items") or len(seq)
- s = iter(seq)
- # handle sets, no slicing
- r = [
- pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
- for i in range(min(nitems, len(seq)))
- ]
- body = ", ".join(r)
- if nitems < len(seq):
- body += ", ..."
- elif isinstance(seq, tuple) and len(seq) == 1:
- body += ","
- return fmt.format(body=body)
- def _pprint_dict(
- seq: Mapping, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds
- ) -> str:
- """
- internal. pprinter for iterables. you should probably use pprint_thing()
- rather than calling this directly.
- """
- fmt = "{{{things}}}"
- pairs = []
- pfmt = "{key}: {val}"
- if max_seq_items is False:
- nitems = len(seq)
- else:
- nitems = max_seq_items or get_option("max_seq_items") or len(seq)
- for k, v in list(seq.items())[:nitems]:
- pairs.append(
- pfmt.format(
- key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
- val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),
- )
- )
- if nitems < len(seq):
- return fmt.format(things=", ".join(pairs) + ", ...")
- else:
- return fmt.format(things=", ".join(pairs))
- def pprint_thing(
- thing: Any,
- _nest_lvl: int = 0,
- escape_chars: EscapeChars | None = None,
- default_escapes: bool = False,
- quote_strings: bool = False,
- max_seq_items: int | None = None,
- ) -> str:
- """
- This function is the sanctioned way of converting objects
- to a string representation and properly handles nested sequences.
- Parameters
- ----------
- thing : anything to be formatted
- _nest_lvl : internal use only. pprint_thing() is mutually-recursive
- with pprint_sequence, this argument is used to keep track of the
- current nesting level, and limit it.
- escape_chars : list or dict, optional
- Characters to escape. If a dict is passed the values are the
- replacements
- default_escapes : bool, default False
- Whether the input escape characters replaces or adds to the defaults
- max_seq_items : int or None, default None
- Pass through to other pretty printers to limit sequence printing
- Returns
- -------
- str
- """
- def as_escaped_string(
- thing: Any, escape_chars: EscapeChars | None = escape_chars
- ) -> str:
- translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"}
- if isinstance(escape_chars, dict):
- if default_escapes:
- translate.update(escape_chars)
- else:
- translate = escape_chars
- escape_chars = list(escape_chars.keys())
- else:
- escape_chars = escape_chars or ()
- result = str(thing)
- for c in escape_chars:
- result = result.replace(c, translate[c])
- return result
- if hasattr(thing, "__next__"):
- return str(thing)
- elif isinstance(thing, dict) and _nest_lvl < get_option(
- "display.pprint_nest_depth"
- ):
- result = _pprint_dict(
- thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items
- )
- elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"):
- result = _pprint_seq(
- thing,
- _nest_lvl,
- escape_chars=escape_chars,
- quote_strings=quote_strings,
- max_seq_items=max_seq_items,
- )
- elif isinstance(thing, str) and quote_strings:
- result = f"'{as_escaped_string(thing)}'"
- else:
- result = as_escaped_string(thing)
- return result
- def pprint_thing_encoded(
- object, encoding: str = "utf-8", errors: str = "replace"
- ) -> bytes:
- value = pprint_thing(object) # get unicode representation of object
- return value.encode(encoding, errors)
- def enable_data_resource_formatter(enable: bool) -> None:
- if "IPython" not in sys.modules:
- # definitely not in IPython
- return
- from IPython import get_ipython
- ip = get_ipython()
- if ip is None:
- # still not in IPython
- return
- formatters = ip.display_formatter.formatters
- mimetype = "application/vnd.dataresource+json"
- if enable:
- if mimetype not in formatters:
- # define tableschema formatter
- from IPython.core.formatters import BaseFormatter
- from traitlets import ObjectName
- class TableSchemaFormatter(BaseFormatter):
- print_method = ObjectName("_repr_data_resource_")
- _return_type = (dict,)
- # register it:
- formatters[mimetype] = TableSchemaFormatter()
- # enable it if it's been disabled:
- formatters[mimetype].enabled = True
- else:
- # unregister tableschema mime-type
- if mimetype in formatters:
- formatters[mimetype].enabled = False
- def default_pprint(thing: Any, max_seq_items: int | None = None) -> str:
- return pprint_thing(
- thing,
- escape_chars=("\t", "\r", "\n"),
- quote_strings=True,
- max_seq_items=max_seq_items,
- )
- def format_object_summary(
- obj,
- formatter: Callable,
- is_justify: bool = True,
- name: str | None = None,
- indent_for_name: bool = True,
- line_break_each_value: bool = False,
- ) -> str:
- """
- Return the formatted obj as a unicode string
- Parameters
- ----------
- obj : object
- must be iterable and support __getitem__
- formatter : callable
- string formatter for an element
- is_justify : bool
- should justify the display
- name : name, optional
- defaults to the class name of the obj
- indent_for_name : bool, default True
- Whether subsequent lines should be indented to
- align with the name.
- line_break_each_value : bool, default False
- If True, inserts a line break for each value of ``obj``.
- If False, only break lines when the a line of values gets wider
- than the display width.
- Returns
- -------
- summary string
- """
- from pandas.io.formats.console import get_console_size
- from pandas.io.formats.format import get_adjustment
- display_width, _ = get_console_size()
- if display_width is None:
- display_width = get_option("display.width") or 80
- if name is None:
- name = type(obj).__name__
- if indent_for_name:
- name_len = len(name)
- space1 = f'\n{(" " * (name_len + 1))}'
- space2 = f'\n{(" " * (name_len + 2))}'
- else:
- space1 = "\n"
- space2 = "\n " # space for the opening '['
- n = len(obj)
- if line_break_each_value:
- # If we want to vertically align on each value of obj, we need to
- # separate values by a line break and indent the values
- sep = ",\n " + " " * len(name)
- else:
- sep = ","
- max_seq_items = get_option("display.max_seq_items") or n
- # are we a truncated display
- is_truncated = n > max_seq_items
- # adj can optionally handle unicode eastern asian width
- adj = get_adjustment()
- def _extend_line(
- s: str, line: str, value: str, display_width: int, next_line_prefix: str
- ) -> tuple[str, str]:
- if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width:
- s += line.rstrip()
- line = next_line_prefix
- line += value
- return s, line
- def best_len(values: list[str]) -> int:
- if values:
- return max(adj.len(x) for x in values)
- else:
- return 0
- close = ", "
- if n == 0:
- summary = f"[]{close}"
- elif n == 1 and not line_break_each_value:
- first = formatter(obj[0])
- summary = f"[{first}]{close}"
- elif n == 2 and not line_break_each_value:
- first = formatter(obj[0])
- last = formatter(obj[-1])
- summary = f"[{first}, {last}]{close}"
- else:
- if max_seq_items == 1:
- # If max_seq_items=1 show only last element
- head = []
- tail = [formatter(x) for x in obj[-1:]]
- elif n > max_seq_items:
- n = min(max_seq_items // 2, 10)
- head = [formatter(x) for x in obj[:n]]
- tail = [formatter(x) for x in obj[-n:]]
- else:
- head = []
- tail = [formatter(x) for x in obj]
- # adjust all values to max length if needed
- if is_justify:
- if line_break_each_value:
- # Justify each string in the values of head and tail, so the
- # strings will right align when head and tail are stacked
- # vertically.
- head, tail = _justify(head, tail)
- elif is_truncated or not (
- len(", ".join(head)) < display_width
- and len(", ".join(tail)) < display_width
- ):
- # Each string in head and tail should align with each other
- max_length = max(best_len(head), best_len(tail))
- head = [x.rjust(max_length) for x in head]
- tail = [x.rjust(max_length) for x in tail]
- # If we are not truncated and we are only a single
- # line, then don't justify
- if line_break_each_value:
- # Now head and tail are of type List[Tuple[str]]. Below we
- # convert them into List[str], so there will be one string per
- # value. Also truncate items horizontally if wider than
- # max_space
- max_space = display_width - len(space2)
- value = tail[0]
- for max_items in reversed(range(1, len(value) + 1)):
- pprinted_seq = _pprint_seq(value, max_seq_items=max_items)
- if len(pprinted_seq) < max_space:
- head = [_pprint_seq(x, max_seq_items=max_items) for x in head]
- tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail]
- break
- summary = ""
- line = space2
- for head_value in head:
- word = head_value + sep + " "
- summary, line = _extend_line(summary, line, word, display_width, space2)
- if is_truncated:
- # remove trailing space of last line
- summary += line.rstrip() + space2 + "..."
- line = space2
- for tail_item in tail[:-1]:
- word = tail_item + sep + " "
- summary, line = _extend_line(summary, line, word, display_width, space2)
- # last value: no sep added + 1 space of width used for trailing ','
- summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2)
- summary += line
- # right now close is either '' or ', '
- # Now we want to include the ']', but not the maybe space.
- close = "]" + close.rstrip(" ")
- summary += close
- if len(summary) > (display_width) or line_break_each_value:
- summary += space1
- else: # one row
- summary += " "
- # remove initial space
- summary = "[" + summary[len(space2) :]
- return summary
- def _justify(
- head: list[Sequence[str]], tail: list[Sequence[str]]
- ) -> tuple[list[tuple[str, ...]], list[tuple[str, ...]]]:
- """
- Justify items in head and tail, so they are right-aligned when stacked.
- Parameters
- ----------
- head : list-like of list-likes of strings
- tail : list-like of list-likes of strings
- Returns
- -------
- tuple of list of tuples of strings
- Same as head and tail, but items are right aligned when stacked
- vertically.
- Examples
- --------
- >>> _justify([['a', 'b']], [['abc', 'abcd']])
- ([(' a', ' b')], [('abc', 'abcd')])
- """
- combined = head + tail
- # For each position for the sequences in ``combined``,
- # find the length of the largest string.
- max_length = [0] * len(combined[0])
- for inner_seq in combined:
- length = [len(item) for item in inner_seq]
- max_length = [max(x, y) for x, y in zip(max_length, length)]
- # justify each item in each list-like in head and tail using max_length
- head_tuples = [
- tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in head
- ]
- tail_tuples = [
- tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in tail
- ]
- return head_tuples, tail_tuples
- class PrettyDict(Dict[_KT, _VT]):
- """Dict extension to support abbreviated __repr__"""
- def __repr__(self) -> str:
- return pprint_thing(self)
|