| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504 | """Printing tools."""from __future__ import annotationsimport sysfrom typing import (    Any,    Callable,    Dict,    Iterable,    Mapping,    Sequence,    TypeVar,    Union,)from pandas._config import get_optionfrom pandas.core.dtypes.inference import is_sequenceEscapeChars = Union[Mapping[str, str], Iterable[str]]_KT = TypeVar("_KT")_VT = TypeVar("_VT")def adjoin(space: int, *lists: list[str], **kwargs) -> str:    """    Glues together two sets of strings using the amount of space requested.    The idea is to prettify.    ----------    space : int        number of spaces for padding    lists : str        list of str which being joined    strlen : callable        function used to calculate the length of each str. Needed for unicode        handling.    justfunc : callable        function used to justify str. Needed for unicode handling.    """    strlen = kwargs.pop("strlen", len)    justfunc = kwargs.pop("justfunc", justify)    out_lines = []    newLists = []    lengths = [max(map(strlen, x)) + space for x in lists[:-1]]    # not the last one    lengths.append(max(map(len, lists[-1])))    maxLen = max(map(len, lists))    for i, lst in enumerate(lists):        nl = justfunc(lst, lengths[i], mode="left")        nl = ([" " * lengths[i]] * (maxLen - len(lst))) + nl        newLists.append(nl)    toJoin = zip(*newLists)    for lines in toJoin:        out_lines.append("".join(lines))    return "\n".join(out_lines)def justify(texts: Iterable[str], max_len: int, mode: str = "right") -> list[str]:    """    Perform ljust, center, rjust against string or list-like    """    if mode == "left":        return [x.ljust(max_len) for x in texts]    elif mode == "center":        return [x.center(max_len) for x in texts]    else:        return [x.rjust(max_len) for x in texts]# Unicode consolidation# ---------------------## pprinting utility functions for generating Unicode text or# bytes(3.x)/str(2.x) representations of objects.# Try to use these as much as possible rather than rolling your own.## When to use# -----------## 1) If you're writing code internal to pandas (no I/O directly involved),#    use pprint_thing().##    It will always return unicode text which can handled by other#    parts of the package without breakage.## 2) if you need to write something out to file, use#    pprint_thing_encoded(encoding).##    If no encoding is specified, it defaults to utf-8. Since encoding pure#    ascii with utf-8 is a no-op you can safely use the default utf-8 if you're#    working with straight ascii.def _pprint_seq(    seq: Sequence, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds) -> str:    """    internal. pprinter for iterables. you should probably use pprint_thing()    rather than calling this directly.    bounds length of printed sequence, depending on options    """    if isinstance(seq, set):        fmt = "{{{body}}}"    else:        fmt = "[{body}]" if hasattr(seq, "__setitem__") else "({body})"    if max_seq_items is False:        nitems = len(seq)    else:        nitems = max_seq_items or get_option("max_seq_items") or len(seq)    s = iter(seq)    # handle sets, no slicing    r = [        pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)        for i in range(min(nitems, len(seq)))    ]    body = ", ".join(r)    if nitems < len(seq):        body += ", ..."    elif isinstance(seq, tuple) and len(seq) == 1:        body += ","    return fmt.format(body=body)def _pprint_dict(    seq: Mapping, _nest_lvl: int = 0, max_seq_items: int | None = None, **kwds) -> str:    """    internal. pprinter for iterables. you should probably use pprint_thing()    rather than calling this directly.    """    fmt = "{{{things}}}"    pairs = []    pfmt = "{key}: {val}"    if max_seq_items is False:        nitems = len(seq)    else:        nitems = max_seq_items or get_option("max_seq_items") or len(seq)    for k, v in list(seq.items())[:nitems]:        pairs.append(            pfmt.format(                key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),                val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds),            )        )    if nitems < len(seq):        return fmt.format(things=", ".join(pairs) + ", ...")    else:        return fmt.format(things=", ".join(pairs))def pprint_thing(    thing: Any,    _nest_lvl: int = 0,    escape_chars: EscapeChars | None = None,    default_escapes: bool = False,    quote_strings: bool = False,    max_seq_items: int | None = None,) -> str:    """    This function is the sanctioned way of converting objects    to a string representation and properly handles nested sequences.    Parameters    ----------    thing : anything to be formatted    _nest_lvl : internal use only. pprint_thing() is mutually-recursive        with pprint_sequence, this argument is used to keep track of the        current nesting level, and limit it.    escape_chars : list or dict, optional        Characters to escape. If a dict is passed the values are the        replacements    default_escapes : bool, default False        Whether the input escape characters replaces or adds to the defaults    max_seq_items : int or None, default None        Pass through to other pretty printers to limit sequence printing    Returns    -------    str    """    def as_escaped_string(        thing: Any, escape_chars: EscapeChars | None = escape_chars    ) -> str:        translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"}        if isinstance(escape_chars, dict):            if default_escapes:                translate.update(escape_chars)            else:                translate = escape_chars            escape_chars = list(escape_chars.keys())        else:            escape_chars = escape_chars or ()        result = str(thing)        for c in escape_chars:            result = result.replace(c, translate[c])        return result    if hasattr(thing, "__next__"):        return str(thing)    elif isinstance(thing, dict) and _nest_lvl < get_option(        "display.pprint_nest_depth"    ):        result = _pprint_dict(            thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items        )    elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"):        result = _pprint_seq(            thing,            _nest_lvl,            escape_chars=escape_chars,            quote_strings=quote_strings,            max_seq_items=max_seq_items,        )    elif isinstance(thing, str) and quote_strings:        result = f"'{as_escaped_string(thing)}'"    else:        result = as_escaped_string(thing)    return resultdef pprint_thing_encoded(    object, encoding: str = "utf-8", errors: str = "replace") -> bytes:    value = pprint_thing(object)  # get unicode representation of object    return value.encode(encoding, errors)def enable_data_resource_formatter(enable: bool) -> None:    if "IPython" not in sys.modules:        # definitely not in IPython        return    from IPython import get_ipython    ip = get_ipython()    if ip is None:        # still not in IPython        return    formatters = ip.display_formatter.formatters    mimetype = "application/vnd.dataresource+json"    if enable:        if mimetype not in formatters:            # define tableschema formatter            from IPython.core.formatters import BaseFormatter            from traitlets import ObjectName            class TableSchemaFormatter(BaseFormatter):                print_method = ObjectName("_repr_data_resource_")                _return_type = (dict,)            # register it:            formatters[mimetype] = TableSchemaFormatter()        # enable it if it's been disabled:        formatters[mimetype].enabled = True    else:        # unregister tableschema mime-type        if mimetype in formatters:            formatters[mimetype].enabled = Falsedef default_pprint(thing: Any, max_seq_items: int | None = None) -> str:    return pprint_thing(        thing,        escape_chars=("\t", "\r", "\n"),        quote_strings=True,        max_seq_items=max_seq_items,    )def format_object_summary(    obj,    formatter: Callable,    is_justify: bool = True,    name: str | None = None,    indent_for_name: bool = True,    line_break_each_value: bool = False,) -> str:    """    Return the formatted obj as a unicode string    Parameters    ----------    obj : object        must be iterable and support __getitem__    formatter : callable        string formatter for an element    is_justify : bool        should justify the display    name : name, optional        defaults to the class name of the obj    indent_for_name : bool, default True        Whether subsequent lines should be indented to        align with the name.    line_break_each_value : bool, default False        If True, inserts a line break for each value of ``obj``.        If False, only break lines when the a line of values gets wider        than the display width.    Returns    -------    summary string    """    from pandas.io.formats.console import get_console_size    from pandas.io.formats.format import get_adjustment    display_width, _ = get_console_size()    if display_width is None:        display_width = get_option("display.width") or 80    if name is None:        name = type(obj).__name__    if indent_for_name:        name_len = len(name)        space1 = f'\n{(" " * (name_len + 1))}'        space2 = f'\n{(" " * (name_len + 2))}'    else:        space1 = "\n"        space2 = "\n "  # space for the opening '['    n = len(obj)    if line_break_each_value:        # If we want to vertically align on each value of obj, we need to        # separate values by a line break and indent the values        sep = ",\n " + " " * len(name)    else:        sep = ","    max_seq_items = get_option("display.max_seq_items") or n    # are we a truncated display    is_truncated = n > max_seq_items    # adj can optionally handle unicode eastern asian width    adj = get_adjustment()    def _extend_line(        s: str, line: str, value: str, display_width: int, next_line_prefix: str    ) -> tuple[str, str]:        if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width:            s += line.rstrip()            line = next_line_prefix        line += value        return s, line    def best_len(values: list[str]) -> int:        if values:            return max(adj.len(x) for x in values)        else:            return 0    close = ", "    if n == 0:        summary = f"[]{close}"    elif n == 1 and not line_break_each_value:        first = formatter(obj[0])        summary = f"[{first}]{close}"    elif n == 2 and not line_break_each_value:        first = formatter(obj[0])        last = formatter(obj[-1])        summary = f"[{first}, {last}]{close}"    else:        if max_seq_items == 1:            # If max_seq_items=1 show only last element            head = []            tail = [formatter(x) for x in obj[-1:]]        elif n > max_seq_items:            n = min(max_seq_items // 2, 10)            head = [formatter(x) for x in obj[:n]]            tail = [formatter(x) for x in obj[-n:]]        else:            head = []            tail = [formatter(x) for x in obj]        # adjust all values to max length if needed        if is_justify:            if line_break_each_value:                # Justify each string in the values of head and tail, so the                # strings will right align when head and tail are stacked                # vertically.                head, tail = _justify(head, tail)            elif is_truncated or not (                len(", ".join(head)) < display_width                and len(", ".join(tail)) < display_width            ):                # Each string in head and tail should align with each other                max_length = max(best_len(head), best_len(tail))                head = [x.rjust(max_length) for x in head]                tail = [x.rjust(max_length) for x in tail]            # If we are not truncated and we are only a single            # line, then don't justify        if line_break_each_value:            # Now head and tail are of type List[Tuple[str]]. Below we            # convert them into List[str], so there will be one string per            # value. Also truncate items horizontally if wider than            # max_space            max_space = display_width - len(space2)            value = tail[0]            for max_items in reversed(range(1, len(value) + 1)):                pprinted_seq = _pprint_seq(value, max_seq_items=max_items)                if len(pprinted_seq) < max_space:                    head = [_pprint_seq(x, max_seq_items=max_items) for x in head]                    tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail]                    break        summary = ""        line = space2        for head_value in head:            word = head_value + sep + " "            summary, line = _extend_line(summary, line, word, display_width, space2)        if is_truncated:            # remove trailing space of last line            summary += line.rstrip() + space2 + "..."            line = space2        for tail_item in tail[:-1]:            word = tail_item + sep + " "            summary, line = _extend_line(summary, line, word, display_width, space2)        # last value: no sep added + 1 space of width used for trailing ','        summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2)        summary += line        # right now close is either '' or ', '        # Now we want to include the ']', but not the maybe space.        close = "]" + close.rstrip(" ")        summary += close        if len(summary) > (display_width) or line_break_each_value:            summary += space1        else:  # one row            summary += " "        # remove initial space        summary = "[" + summary[len(space2) :]    return summarydef _justify(    head: list[Sequence[str]], tail: list[Sequence[str]]) -> tuple[list[tuple[str, ...]], list[tuple[str, ...]]]:    """    Justify items in head and tail, so they are right-aligned when stacked.    Parameters    ----------    head : list-like of list-likes of strings    tail : list-like of list-likes of strings    Returns    -------    tuple of list of tuples of strings        Same as head and tail, but items are right aligned when stacked        vertically.    Examples    --------    >>> _justify([['a', 'b']], [['abc', 'abcd']])    ([('  a', '   b')], [('abc', 'abcd')])    """    combined = head + tail    # For each position for the sequences in ``combined``,    # find the length of the largest string.    max_length = [0] * len(combined[0])    for inner_seq in combined:        length = [len(item) for item in inner_seq]        max_length = [max(x, y) for x, y in zip(max_length, length)]    # justify each item in each list-like in head and tail using max_length    head_tuples = [        tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in head    ]    tail_tuples = [        tuple(x.rjust(max_len) for x, max_len in zip(seq, max_length)) for seq in tail    ]    return head_tuples, tail_tuplesclass PrettyDict(Dict[_KT, _VT]):    """Dict extension to support abbreviated __repr__"""    def __repr__(self) -> str:        return pprint_thing(self)
 |