12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240 |
- """
- Internal module for formatting output data in csv, html, xml,
- and latex files. This module also applies to display formatting.
- """
- from __future__ import annotations
- from contextlib import contextmanager
- from csv import (
- QUOTE_NONE,
- QUOTE_NONNUMERIC,
- )
- from decimal import Decimal
- from functools import partial
- from io import StringIO
- import math
- import re
- from shutil import get_terminal_size
- from typing import (
- IO,
- TYPE_CHECKING,
- Any,
- Callable,
- Final,
- Generator,
- Hashable,
- Iterable,
- List,
- Mapping,
- Sequence,
- cast,
- )
- from unicodedata import east_asian_width
- import numpy as np
- from pandas._config.config import (
- get_option,
- set_option,
- )
- from pandas._libs import lib
- from pandas._libs.missing import NA
- from pandas._libs.tslibs import (
- NaT,
- Timedelta,
- Timestamp,
- get_unit_from_dtype,
- iNaT,
- periods_per_day,
- )
- from pandas._libs.tslibs.nattype import NaTType
- from pandas._typing import (
- ArrayLike,
- Axes,
- ColspaceArgType,
- ColspaceType,
- CompressionOptions,
- FilePath,
- FloatFormatType,
- FormattersType,
- IndexLabel,
- StorageOptions,
- WriteBuffer,
- )
- from pandas.core.dtypes.common import (
- is_categorical_dtype,
- is_complex_dtype,
- is_datetime64_dtype,
- is_extension_array_dtype,
- is_float,
- is_float_dtype,
- is_integer,
- is_integer_dtype,
- is_list_like,
- is_numeric_dtype,
- is_scalar,
- is_timedelta64_dtype,
- )
- from pandas.core.dtypes.dtypes import DatetimeTZDtype
- from pandas.core.dtypes.missing import (
- isna,
- notna,
- )
- from pandas.core.arrays import (
- Categorical,
- DatetimeArray,
- TimedeltaArray,
- )
- from pandas.core.arrays.string_ import StringDtype
- from pandas.core.base import PandasObject
- import pandas.core.common as com
- from pandas.core.construction import extract_array
- from pandas.core.indexes.api import (
- Index,
- MultiIndex,
- PeriodIndex,
- ensure_index,
- )
- from pandas.core.indexes.datetimes import DatetimeIndex
- from pandas.core.indexes.timedeltas import TimedeltaIndex
- from pandas.core.reshape.concat import concat
- from pandas.io.common import (
- check_parent_directory,
- stringify_path,
- )
- from pandas.io.formats import printing
- if TYPE_CHECKING:
- from pandas import (
- DataFrame,
- Series,
- )
- common_docstring: Final = """
- Parameters
- ----------
- buf : str, Path or StringIO-like, optional, default None
- Buffer to write to. If None, the output is returned as a string.
- columns : sequence, optional, default None
- The subset of columns to write. Writes all columns by default.
- col_space : %(col_space_type)s, optional
- %(col_space)s.
- header : %(header_type)s, optional
- %(header)s.
- index : bool, optional, default True
- Whether to print index (row) labels.
- na_rep : str, optional, default 'NaN'
- String representation of ``NaN`` to use.
- formatters : list, tuple or dict of one-param. functions, optional
- Formatter functions to apply to columns' elements by position or
- name.
- The result of each function must be a unicode string.
- List/tuple must be of length equal to the number of columns.
- float_format : one-parameter function, optional, default None
- Formatter function to apply to columns' elements if they are
- floats. This function must return a unicode string and will be
- applied only to the non-``NaN`` elements, with ``NaN`` being
- handled by ``na_rep``.
- .. versionchanged:: 1.2.0
- sparsify : bool, optional, default True
- Set to False for a DataFrame with a hierarchical index to print
- every multiindex key at each row.
- index_names : bool, optional, default True
- Prints the names of the indexes.
- justify : str, default None
- How to justify the column labels. If None uses the option from
- the print configuration (controlled by set_option), 'right' out
- of the box. Valid values are
- * left
- * right
- * center
- * justify
- * justify-all
- * start
- * end
- * inherit
- * match-parent
- * initial
- * unset.
- max_rows : int, optional
- Maximum number of rows to display in the console.
- max_cols : int, optional
- Maximum number of columns to display in the console.
- show_dimensions : bool, default False
- Display DataFrame dimensions (number of rows by number of columns).
- decimal : str, default '.'
- Character recognized as decimal separator, e.g. ',' in Europe.
- """
- _VALID_JUSTIFY_PARAMETERS = (
- "left",
- "right",
- "center",
- "justify",
- "justify-all",
- "start",
- "end",
- "inherit",
- "match-parent",
- "initial",
- "unset",
- )
- return_docstring: Final = """
- Returns
- -------
- str or None
- If buf is None, returns the result as a string. Otherwise returns
- None.
- """
- class CategoricalFormatter:
- def __init__(
- self,
- categorical: Categorical,
- buf: IO[str] | None = None,
- length: bool = True,
- na_rep: str = "NaN",
- footer: bool = True,
- ) -> None:
- self.categorical = categorical
- self.buf = buf if buf is not None else StringIO("")
- self.na_rep = na_rep
- self.length = length
- self.footer = footer
- self.quoting = QUOTE_NONNUMERIC
- def _get_footer(self) -> str:
- footer = ""
- if self.length:
- if footer:
- footer += ", "
- footer += f"Length: {len(self.categorical)}"
- level_info = self.categorical._repr_categories_info()
- # Levels are added in a newline
- if footer:
- footer += "\n"
- footer += level_info
- return str(footer)
- def _get_formatted_values(self) -> list[str]:
- return format_array(
- self.categorical._internal_get_values(),
- None,
- float_format=None,
- na_rep=self.na_rep,
- quoting=self.quoting,
- )
- def to_string(self) -> str:
- categorical = self.categorical
- if len(categorical) == 0:
- if self.footer:
- return self._get_footer()
- else:
- return ""
- fmt_values = self._get_formatted_values()
- fmt_values = [i.strip() for i in fmt_values]
- values = ", ".join(fmt_values)
- result = ["[" + values + "]"]
- if self.footer:
- footer = self._get_footer()
- if footer:
- result.append(footer)
- return str("\n".join(result))
- class SeriesFormatter:
- def __init__(
- self,
- series: Series,
- buf: IO[str] | None = None,
- length: bool | str = True,
- header: bool = True,
- index: bool = True,
- na_rep: str = "NaN",
- name: bool = False,
- float_format: str | None = None,
- dtype: bool = True,
- max_rows: int | None = None,
- min_rows: int | None = None,
- ) -> None:
- self.series = series
- self.buf = buf if buf is not None else StringIO()
- self.name = name
- self.na_rep = na_rep
- self.header = header
- self.length = length
- self.index = index
- self.max_rows = max_rows
- self.min_rows = min_rows
- if float_format is None:
- float_format = get_option("display.float_format")
- self.float_format = float_format
- self.dtype = dtype
- self.adj = get_adjustment()
- self._chk_truncate()
- def _chk_truncate(self) -> None:
- self.tr_row_num: int | None
- min_rows = self.min_rows
- max_rows = self.max_rows
- # truncation determined by max_rows, actual truncated number of rows
- # used below by min_rows
- is_truncated_vertically = max_rows and (len(self.series) > max_rows)
- series = self.series
- if is_truncated_vertically:
- max_rows = cast(int, max_rows)
- if min_rows:
- # if min_rows is set (not None or 0), set max_rows to minimum
- # of both
- max_rows = min(min_rows, max_rows)
- if max_rows == 1:
- row_num = max_rows
- series = series.iloc[:max_rows]
- else:
- row_num = max_rows // 2
- series = concat((series.iloc[:row_num], series.iloc[-row_num:]))
- self.tr_row_num = row_num
- else:
- self.tr_row_num = None
- self.tr_series = series
- self.is_truncated_vertically = is_truncated_vertically
- def _get_footer(self) -> str:
- name = self.series.name
- footer = ""
- if getattr(self.series.index, "freq", None) is not None:
- assert isinstance(
- self.series.index, (DatetimeIndex, PeriodIndex, TimedeltaIndex)
- )
- footer += f"Freq: {self.series.index.freqstr}"
- if self.name is not False and name is not None:
- if footer:
- footer += ", "
- series_name = printing.pprint_thing(name, escape_chars=("\t", "\r", "\n"))
- footer += f"Name: {series_name}"
- if self.length is True or (
- self.length == "truncate" and self.is_truncated_vertically
- ):
- if footer:
- footer += ", "
- footer += f"Length: {len(self.series)}"
- if self.dtype is not False and self.dtype is not None:
- dtype_name = getattr(self.tr_series.dtype, "name", None)
- if dtype_name:
- if footer:
- footer += ", "
- footer += f"dtype: {printing.pprint_thing(dtype_name)}"
- # level infos are added to the end and in a new line, like it is done
- # for Categoricals
- if is_categorical_dtype(self.tr_series.dtype):
- level_info = self.tr_series._values._repr_categories_info()
- if footer:
- footer += "\n"
- footer += level_info
- return str(footer)
- def _get_formatted_index(self) -> tuple[list[str], bool]:
- index = self.tr_series.index
- if isinstance(index, MultiIndex):
- have_header = any(name for name in index.names)
- fmt_index = index.format(names=True)
- else:
- have_header = index.name is not None
- fmt_index = index.format(name=True)
- return fmt_index, have_header
- def _get_formatted_values(self) -> list[str]:
- return format_array(
- self.tr_series._values,
- None,
- float_format=self.float_format,
- na_rep=self.na_rep,
- leading_space=self.index,
- )
- def to_string(self) -> str:
- series = self.tr_series
- footer = self._get_footer()
- if len(series) == 0:
- return f"{type(self.series).__name__}([], {footer})"
- fmt_index, have_header = self._get_formatted_index()
- fmt_values = self._get_formatted_values()
- if self.is_truncated_vertically:
- n_header_rows = 0
- row_num = self.tr_row_num
- row_num = cast(int, row_num)
- width = self.adj.len(fmt_values[row_num - 1])
- if width > 3:
- dot_str = "..."
- else:
- dot_str = ".."
- # Series uses mode=center because it has single value columns
- # DataFrame uses mode=left
- dot_str = self.adj.justify([dot_str], width, mode="center")[0]
- fmt_values.insert(row_num + n_header_rows, dot_str)
- fmt_index.insert(row_num + 1, "")
- if self.index:
- result = self.adj.adjoin(3, *[fmt_index[1:], fmt_values])
- else:
- result = self.adj.adjoin(3, fmt_values)
- if self.header and have_header:
- result = fmt_index[0] + "\n" + result
- if footer:
- result += "\n" + footer
- return str("".join(result))
- class TextAdjustment:
- def __init__(self) -> None:
- self.encoding = get_option("display.encoding")
- def len(self, text: str) -> int:
- return len(text)
- def justify(self, texts: Any, max_len: int, mode: str = "right") -> list[str]:
- return printing.justify(texts, max_len, mode=mode)
- def adjoin(self, space: int, *lists, **kwargs) -> str:
- return printing.adjoin(
- space, *lists, strlen=self.len, justfunc=self.justify, **kwargs
- )
- class EastAsianTextAdjustment(TextAdjustment):
- def __init__(self) -> None:
- super().__init__()
- if get_option("display.unicode.ambiguous_as_wide"):
- self.ambiguous_width = 2
- else:
- self.ambiguous_width = 1
- # Definition of East Asian Width
- # https://unicode.org/reports/tr11/
- # Ambiguous width can be changed by option
- self._EAW_MAP = {"Na": 1, "N": 1, "W": 2, "F": 2, "H": 1}
- def len(self, text: str) -> int:
- """
- Calculate display width considering unicode East Asian Width
- """
- if not isinstance(text, str):
- return len(text)
- return sum(
- self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) for c in text
- )
- def justify(
- self, texts: Iterable[str], max_len: int, mode: str = "right"
- ) -> list[str]:
- # re-calculate padding space per str considering East Asian Width
- def _get_pad(t):
- return max_len - self.len(t) + len(t)
- if mode == "left":
- return [x.ljust(_get_pad(x)) for x in texts]
- elif mode == "center":
- return [x.center(_get_pad(x)) for x in texts]
- else:
- return [x.rjust(_get_pad(x)) for x in texts]
- def get_adjustment() -> TextAdjustment:
- use_east_asian_width = get_option("display.unicode.east_asian_width")
- if use_east_asian_width:
- return EastAsianTextAdjustment()
- else:
- return TextAdjustment()
- def get_dataframe_repr_params() -> dict[str, Any]:
- """Get the parameters used to repr(dataFrame) calls using DataFrame.to_string.
- Supplying these parameters to DataFrame.to_string is equivalent to calling
- ``repr(DataFrame)``. This is useful if you want to adjust the repr output.
- .. versionadded:: 1.4.0
- Example
- -------
- >>> import pandas as pd
- >>>
- >>> df = pd.DataFrame([[1, 2], [3, 4]])
- >>> repr_params = pd.io.formats.format.get_dataframe_repr_params()
- >>> repr(df) == df.to_string(**repr_params)
- True
- """
- from pandas.io.formats import console
- if get_option("display.expand_frame_repr"):
- line_width, _ = console.get_console_size()
- else:
- line_width = None
- return {
- "max_rows": get_option("display.max_rows"),
- "min_rows": get_option("display.min_rows"),
- "max_cols": get_option("display.max_columns"),
- "max_colwidth": get_option("display.max_colwidth"),
- "show_dimensions": get_option("display.show_dimensions"),
- "line_width": line_width,
- }
- def get_series_repr_params() -> dict[str, Any]:
- """Get the parameters used to repr(Series) calls using Series.to_string.
- Supplying these parameters to Series.to_string is equivalent to calling
- ``repr(series)``. This is useful if you want to adjust the series repr output.
- .. versionadded:: 1.4.0
- Example
- -------
- >>> import pandas as pd
- >>>
- >>> ser = pd.Series([1, 2, 3, 4])
- >>> repr_params = pd.io.formats.format.get_series_repr_params()
- >>> repr(ser) == ser.to_string(**repr_params)
- True
- """
- width, height = get_terminal_size()
- max_rows = (
- height
- if get_option("display.max_rows") == 0
- else get_option("display.max_rows")
- )
- min_rows = (
- height
- if get_option("display.max_rows") == 0
- else get_option("display.min_rows")
- )
- return {
- "name": True,
- "dtype": True,
- "min_rows": min_rows,
- "max_rows": max_rows,
- "length": get_option("display.show_dimensions"),
- }
- class DataFrameFormatter:
- """Class for processing dataframe formatting options and data."""
- __doc__ = __doc__ if __doc__ else ""
- __doc__ += common_docstring + return_docstring
- def __init__(
- self,
- frame: DataFrame,
- columns: Sequence[Hashable] | None = None,
- col_space: ColspaceArgType | None = None,
- header: bool | Sequence[str] = True,
- index: bool = True,
- na_rep: str = "NaN",
- formatters: FormattersType | None = None,
- justify: str | None = None,
- float_format: FloatFormatType | None = None,
- sparsify: bool | None = None,
- index_names: bool = True,
- max_rows: int | None = None,
- min_rows: int | None = None,
- max_cols: int | None = None,
- show_dimensions: bool | str = False,
- decimal: str = ".",
- bold_rows: bool = False,
- escape: bool = True,
- ) -> None:
- self.frame = frame
- self.columns = self._initialize_columns(columns)
- self.col_space = self._initialize_colspace(col_space)
- self.header = header
- self.index = index
- self.na_rep = na_rep
- self.formatters = self._initialize_formatters(formatters)
- self.justify = self._initialize_justify(justify)
- self.float_format = float_format
- self.sparsify = self._initialize_sparsify(sparsify)
- self.show_index_names = index_names
- self.decimal = decimal
- self.bold_rows = bold_rows
- self.escape = escape
- self.max_rows = max_rows
- self.min_rows = min_rows
- self.max_cols = max_cols
- self.show_dimensions = show_dimensions
- self.max_cols_fitted = self._calc_max_cols_fitted()
- self.max_rows_fitted = self._calc_max_rows_fitted()
- self.tr_frame = self.frame
- self.truncate()
- self.adj = get_adjustment()
- def get_strcols(self) -> list[list[str]]:
- """
- Render a DataFrame to a list of columns (as lists of strings).
- """
- strcols = self._get_strcols_without_index()
- if self.index:
- str_index = self._get_formatted_index(self.tr_frame)
- strcols.insert(0, str_index)
- return strcols
- @property
- def should_show_dimensions(self) -> bool:
- return self.show_dimensions is True or (
- self.show_dimensions == "truncate" and self.is_truncated
- )
- @property
- def is_truncated(self) -> bool:
- return bool(self.is_truncated_horizontally or self.is_truncated_vertically)
- @property
- def is_truncated_horizontally(self) -> bool:
- return bool(self.max_cols_fitted and (len(self.columns) > self.max_cols_fitted))
- @property
- def is_truncated_vertically(self) -> bool:
- return bool(self.max_rows_fitted and (len(self.frame) > self.max_rows_fitted))
- @property
- def dimensions_info(self) -> str:
- return f"\n\n[{len(self.frame)} rows x {len(self.frame.columns)} columns]"
- @property
- def has_index_names(self) -> bool:
- return _has_names(self.frame.index)
- @property
- def has_column_names(self) -> bool:
- return _has_names(self.frame.columns)
- @property
- def show_row_idx_names(self) -> bool:
- return all((self.has_index_names, self.index, self.show_index_names))
- @property
- def show_col_idx_names(self) -> bool:
- return all((self.has_column_names, self.show_index_names, self.header))
- @property
- def max_rows_displayed(self) -> int:
- return min(self.max_rows or len(self.frame), len(self.frame))
- def _initialize_sparsify(self, sparsify: bool | None) -> bool:
- if sparsify is None:
- return get_option("display.multi_sparse")
- return sparsify
- def _initialize_formatters(
- self, formatters: FormattersType | None
- ) -> FormattersType:
- if formatters is None:
- return {}
- elif len(self.frame.columns) == len(formatters) or isinstance(formatters, dict):
- return formatters
- else:
- raise ValueError(
- f"Formatters length({len(formatters)}) should match "
- f"DataFrame number of columns({len(self.frame.columns)})"
- )
- def _initialize_justify(self, justify: str | None) -> str:
- if justify is None:
- return get_option("display.colheader_justify")
- else:
- return justify
- def _initialize_columns(self, columns: Sequence[Hashable] | None) -> Index:
- if columns is not None:
- # GH 47231 - columns doesn't have to be `Sequence[str]`
- # Will fix in later PR
- cols = ensure_index(cast(Axes, columns))
- self.frame = self.frame[cols]
- return cols
- else:
- return self.frame.columns
- def _initialize_colspace(self, col_space: ColspaceArgType | None) -> ColspaceType:
- result: ColspaceType
- if col_space is None:
- result = {}
- elif isinstance(col_space, (int, str)):
- result = {"": col_space}
- result.update({column: col_space for column in self.frame.columns})
- elif isinstance(col_space, Mapping):
- for column in col_space.keys():
- if column not in self.frame.columns and column != "":
- raise ValueError(
- f"Col_space is defined for an unknown column: {column}"
- )
- result = col_space
- else:
- if len(self.frame.columns) != len(col_space):
- raise ValueError(
- f"Col_space length({len(col_space)}) should match "
- f"DataFrame number of columns({len(self.frame.columns)})"
- )
- result = dict(zip(self.frame.columns, col_space))
- return result
- def _calc_max_cols_fitted(self) -> int | None:
- """Number of columns fitting the screen."""
- if not self._is_in_terminal():
- return self.max_cols
- width, _ = get_terminal_size()
- if self._is_screen_narrow(width):
- return width
- else:
- return self.max_cols
- def _calc_max_rows_fitted(self) -> int | None:
- """Number of rows with data fitting the screen."""
- max_rows: int | None
- if self._is_in_terminal():
- _, height = get_terminal_size()
- if self.max_rows == 0:
- # rows available to fill with actual data
- return height - self._get_number_of_auxillary_rows()
- if self._is_screen_short(height):
- max_rows = height
- else:
- max_rows = self.max_rows
- else:
- max_rows = self.max_rows
- return self._adjust_max_rows(max_rows)
- def _adjust_max_rows(self, max_rows: int | None) -> int | None:
- """Adjust max_rows using display logic.
- See description here:
- https://pandas.pydata.org/docs/dev/user_guide/options.html#frequently-used-options
- GH #37359
- """
- if max_rows:
- if (len(self.frame) > max_rows) and self.min_rows:
- # if truncated, set max_rows showed to min_rows
- max_rows = min(self.min_rows, max_rows)
- return max_rows
- def _is_in_terminal(self) -> bool:
- """Check if the output is to be shown in terminal."""
- return bool(self.max_cols == 0 or self.max_rows == 0)
- def _is_screen_narrow(self, max_width) -> bool:
- return bool(self.max_cols == 0 and len(self.frame.columns) > max_width)
- def _is_screen_short(self, max_height) -> bool:
- return bool(self.max_rows == 0 and len(self.frame) > max_height)
- def _get_number_of_auxillary_rows(self) -> int:
- """Get number of rows occupied by prompt, dots and dimension info."""
- dot_row = 1
- prompt_row = 1
- num_rows = dot_row + prompt_row
- if self.show_dimensions:
- num_rows += len(self.dimensions_info.splitlines())
- if self.header:
- num_rows += 1
- return num_rows
- def truncate(self) -> None:
- """
- Check whether the frame should be truncated. If so, slice the frame up.
- """
- if self.is_truncated_horizontally:
- self._truncate_horizontally()
- if self.is_truncated_vertically:
- self._truncate_vertically()
- def _truncate_horizontally(self) -> None:
- """Remove columns, which are not to be displayed and adjust formatters.
- Attributes affected:
- - tr_frame
- - formatters
- - tr_col_num
- """
- assert self.max_cols_fitted is not None
- col_num = self.max_cols_fitted // 2
- if col_num >= 1:
- left = self.tr_frame.iloc[:, :col_num]
- right = self.tr_frame.iloc[:, -col_num:]
- self.tr_frame = concat((left, right), axis=1)
- # truncate formatter
- if isinstance(self.formatters, (list, tuple)):
- self.formatters = [
- *self.formatters[:col_num],
- *self.formatters[-col_num:],
- ]
- else:
- col_num = cast(int, self.max_cols)
- self.tr_frame = self.tr_frame.iloc[:, :col_num]
- self.tr_col_num = col_num
- def _truncate_vertically(self) -> None:
- """Remove rows, which are not to be displayed.
- Attributes affected:
- - tr_frame
- - tr_row_num
- """
- assert self.max_rows_fitted is not None
- row_num = self.max_rows_fitted // 2
- if row_num >= 1:
- head = self.tr_frame.iloc[:row_num, :]
- tail = self.tr_frame.iloc[-row_num:, :]
- self.tr_frame = concat((head, tail))
- else:
- row_num = cast(int, self.max_rows)
- self.tr_frame = self.tr_frame.iloc[:row_num, :]
- self.tr_row_num = row_num
- def _get_strcols_without_index(self) -> list[list[str]]:
- strcols: list[list[str]] = []
- if not is_list_like(self.header) and not self.header:
- for i, c in enumerate(self.tr_frame):
- fmt_values = self.format_col(i)
- fmt_values = _make_fixed_width(
- strings=fmt_values,
- justify=self.justify,
- minimum=int(self.col_space.get(c, 0)),
- adj=self.adj,
- )
- strcols.append(fmt_values)
- return strcols
- if is_list_like(self.header):
- # cast here since can't be bool if is_list_like
- self.header = cast(List[str], self.header)
- if len(self.header) != len(self.columns):
- raise ValueError(
- f"Writing {len(self.columns)} cols "
- f"but got {len(self.header)} aliases"
- )
- str_columns = [[label] for label in self.header]
- else:
- str_columns = self._get_formatted_column_labels(self.tr_frame)
- if self.show_row_idx_names:
- for x in str_columns:
- x.append("")
- for i, c in enumerate(self.tr_frame):
- cheader = str_columns[i]
- header_colwidth = max(
- int(self.col_space.get(c, 0)), *(self.adj.len(x) for x in cheader)
- )
- fmt_values = self.format_col(i)
- fmt_values = _make_fixed_width(
- fmt_values, self.justify, minimum=header_colwidth, adj=self.adj
- )
- max_len = max(max(self.adj.len(x) for x in fmt_values), header_colwidth)
- cheader = self.adj.justify(cheader, max_len, mode=self.justify)
- strcols.append(cheader + fmt_values)
- return strcols
- def format_col(self, i: int) -> list[str]:
- frame = self.tr_frame
- formatter = self._get_formatter(i)
- return format_array(
- frame.iloc[:, i]._values,
- formatter,
- float_format=self.float_format,
- na_rep=self.na_rep,
- space=self.col_space.get(frame.columns[i]),
- decimal=self.decimal,
- leading_space=self.index,
- )
- def _get_formatter(self, i: str | int) -> Callable | None:
- if isinstance(self.formatters, (list, tuple)):
- if is_integer(i):
- i = cast(int, i)
- return self.formatters[i]
- else:
- return None
- else:
- if is_integer(i) and i not in self.columns:
- i = self.columns[i]
- return self.formatters.get(i, None)
- def _get_formatted_column_labels(self, frame: DataFrame) -> list[list[str]]:
- from pandas.core.indexes.multi import sparsify_labels
- columns = frame.columns
- if isinstance(columns, MultiIndex):
- fmt_columns = columns.format(sparsify=False, adjoin=False)
- fmt_columns = list(zip(*fmt_columns))
- dtypes = self.frame.dtypes._values
- # if we have a Float level, they don't use leading space at all
- restrict_formatting = any(level.is_floating for level in columns.levels)
- need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
- def space_format(x, y):
- if (
- y not in self.formatters
- and need_leadsp[x]
- and not restrict_formatting
- ):
- return " " + y
- return y
- str_columns = list(
- zip(*([space_format(x, y) for y in x] for x in fmt_columns))
- )
- if self.sparsify and len(str_columns):
- str_columns = sparsify_labels(str_columns)
- str_columns = [list(x) for x in zip(*str_columns)]
- else:
- fmt_columns = columns.format()
- dtypes = self.frame.dtypes
- need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes)))
- str_columns = [
- [" " + x if not self._get_formatter(i) and need_leadsp[x] else x]
- for i, x in enumerate(fmt_columns)
- ]
- # self.str_columns = str_columns
- return str_columns
- def _get_formatted_index(self, frame: DataFrame) -> list[str]:
- # Note: this is only used by to_string() and to_latex(), not by
- # to_html(). so safe to cast col_space here.
- col_space = {k: cast(int, v) for k, v in self.col_space.items()}
- index = frame.index
- columns = frame.columns
- fmt = self._get_formatter("__index__")
- if isinstance(index, MultiIndex):
- fmt_index = index.format(
- sparsify=self.sparsify,
- adjoin=False,
- names=self.show_row_idx_names,
- formatter=fmt,
- )
- else:
- fmt_index = [index.format(name=self.show_row_idx_names, formatter=fmt)]
- fmt_index = [
- tuple(
- _make_fixed_width(
- list(x), justify="left", minimum=col_space.get("", 0), adj=self.adj
- )
- )
- for x in fmt_index
- ]
- adjoined = self.adj.adjoin(1, *fmt_index).split("\n")
- # empty space for columns
- if self.show_col_idx_names:
- col_header = [str(x) for x in self._get_column_name_list()]
- else:
- col_header = [""] * columns.nlevels
- if self.header:
- return col_header + adjoined
- else:
- return adjoined
- def _get_column_name_list(self) -> list[Hashable]:
- names: list[Hashable] = []
- columns = self.frame.columns
- if isinstance(columns, MultiIndex):
- names.extend("" if name is None else name for name in columns.names)
- else:
- names.append("" if columns.name is None else columns.name)
- return names
- class DataFrameRenderer:
- """Class for creating dataframe output in multiple formats.
- Called in pandas.core.generic.NDFrame:
- - to_csv
- - to_latex
- Called in pandas.core.frame.DataFrame:
- - to_html
- - to_string
- Parameters
- ----------
- fmt : DataFrameFormatter
- Formatter with the formatting options.
- """
- def __init__(self, fmt: DataFrameFormatter) -> None:
- self.fmt = fmt
- def to_latex(
- self,
- buf: FilePath | WriteBuffer[str] | None = None,
- column_format: str | None = None,
- longtable: bool = False,
- encoding: str | None = None,
- multicolumn: bool = False,
- multicolumn_format: str | None = None,
- multirow: bool = False,
- caption: str | tuple[str, str] | None = None,
- label: str | None = None,
- position: str | None = None,
- ) -> str | None:
- """
- Render a DataFrame to a LaTeX tabular/longtable environment output.
- """
- from pandas.io.formats.latex import LatexFormatter
- latex_formatter = LatexFormatter(
- self.fmt,
- longtable=longtable,
- column_format=column_format,
- multicolumn=multicolumn,
- multicolumn_format=multicolumn_format,
- multirow=multirow,
- caption=caption,
- label=label,
- position=position,
- )
- string = latex_formatter.to_string()
- return save_to_buffer(string, buf=buf, encoding=encoding)
- def to_html(
- self,
- buf: FilePath | WriteBuffer[str] | None = None,
- encoding: str | None = None,
- classes: str | list | tuple | None = None,
- notebook: bool = False,
- border: int | bool | None = None,
- table_id: str | None = None,
- render_links: bool = False,
- ) -> str | None:
- """
- Render a DataFrame to a html table.
- Parameters
- ----------
- buf : str, path object, file-like object, or None, default None
- String, path object (implementing ``os.PathLike[str]``), or file-like
- object implementing a string ``write()`` function. If None, the result is
- returned as a string.
- encoding : str, default “utf-8”
- Set character encoding.
- classes : str or list-like
- classes to include in the `class` attribute of the opening
- ``<table>`` tag, in addition to the default "dataframe".
- notebook : {True, False}, optional, default False
- Whether the generated HTML is for IPython Notebook.
- border : int
- A ``border=border`` attribute is included in the opening
- ``<table>`` tag. Default ``pd.options.display.html.border``.
- table_id : str, optional
- A css id is included in the opening `<table>` tag if specified.
- render_links : bool, default False
- Convert URLs to HTML links.
- """
- from pandas.io.formats.html import (
- HTMLFormatter,
- NotebookFormatter,
- )
- Klass = NotebookFormatter if notebook else HTMLFormatter
- html_formatter = Klass(
- self.fmt,
- classes=classes,
- border=border,
- table_id=table_id,
- render_links=render_links,
- )
- string = html_formatter.to_string()
- return save_to_buffer(string, buf=buf, encoding=encoding)
- def to_string(
- self,
- buf: FilePath | WriteBuffer[str] | None = None,
- encoding: str | None = None,
- line_width: int | None = None,
- ) -> str | None:
- """
- Render a DataFrame to a console-friendly tabular output.
- Parameters
- ----------
- buf : str, path object, file-like object, or None, default None
- String, path object (implementing ``os.PathLike[str]``), or file-like
- object implementing a string ``write()`` function. If None, the result is
- returned as a string.
- encoding: str, default “utf-8”
- Set character encoding.
- line_width : int, optional
- Width to wrap a line in characters.
- """
- from pandas.io.formats.string import StringFormatter
- string_formatter = StringFormatter(self.fmt, line_width=line_width)
- string = string_formatter.to_string()
- return save_to_buffer(string, buf=buf, encoding=encoding)
- def to_csv(
- self,
- path_or_buf: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None,
- encoding: str | None = None,
- sep: str = ",",
- columns: Sequence[Hashable] | None = None,
- index_label: IndexLabel | None = None,
- mode: str = "w",
- compression: CompressionOptions = "infer",
- quoting: int | None = None,
- quotechar: str = '"',
- lineterminator: str | None = None,
- chunksize: int | None = None,
- date_format: str | None = None,
- doublequote: bool = True,
- escapechar: str | None = None,
- errors: str = "strict",
- storage_options: StorageOptions = None,
- ) -> str | None:
- """
- Render dataframe as comma-separated file.
- """
- from pandas.io.formats.csvs import CSVFormatter
- if path_or_buf is None:
- created_buffer = True
- path_or_buf = StringIO()
- else:
- created_buffer = False
- csv_formatter = CSVFormatter(
- path_or_buf=path_or_buf,
- lineterminator=lineterminator,
- sep=sep,
- encoding=encoding,
- errors=errors,
- compression=compression,
- quoting=quoting,
- cols=columns,
- index_label=index_label,
- mode=mode,
- chunksize=chunksize,
- quotechar=quotechar,
- date_format=date_format,
- doublequote=doublequote,
- escapechar=escapechar,
- storage_options=storage_options,
- formatter=self.fmt,
- )
- csv_formatter.save()
- if created_buffer:
- assert isinstance(path_or_buf, StringIO)
- content = path_or_buf.getvalue()
- path_or_buf.close()
- return content
- return None
- def save_to_buffer(
- string: str,
- buf: FilePath | WriteBuffer[str] | None = None,
- encoding: str | None = None,
- ) -> str | None:
- """
- Perform serialization. Write to buf or return as string if buf is None.
- """
- with get_buffer(buf, encoding=encoding) as f:
- f.write(string)
- if buf is None:
- # error: "WriteBuffer[str]" has no attribute "getvalue"
- return f.getvalue() # type: ignore[attr-defined]
- return None
- @contextmanager
- def get_buffer(
- buf: FilePath | WriteBuffer[str] | None, encoding: str | None = None
- ) -> Generator[WriteBuffer[str], None, None] | Generator[StringIO, None, None]:
- """
- Context manager to open, yield and close buffer for filenames or Path-like
- objects, otherwise yield buf unchanged.
- """
- if buf is not None:
- buf = stringify_path(buf)
- else:
- buf = StringIO()
- if encoding is None:
- encoding = "utf-8"
- elif not isinstance(buf, str):
- raise ValueError("buf is not a file name and encoding is specified.")
- if hasattr(buf, "write"):
- # Incompatible types in "yield" (actual type "Union[str, WriteBuffer[str],
- # StringIO]", expected type "Union[WriteBuffer[str], StringIO]")
- yield buf # type: ignore[misc]
- elif isinstance(buf, str):
- check_parent_directory(str(buf))
- with open(buf, "w", encoding=encoding, newline="") as f:
- # GH#30034 open instead of codecs.open prevents a file leak
- # if we have an invalid encoding argument.
- # newline="" is needed to roundtrip correctly on
- # windows test_to_latex_filename
- yield f
- else:
- raise TypeError("buf is not a file name and it has no write method")
- # ----------------------------------------------------------------------
- # Array formatters
- def format_array(
- values: Any,
- formatter: Callable | None,
- float_format: FloatFormatType | None = None,
- na_rep: str = "NaN",
- digits: int | None = None,
- space: str | int | None = None,
- justify: str = "right",
- decimal: str = ".",
- leading_space: bool | None = True,
- quoting: int | None = None,
- fallback_formatter: Callable | None = None,
- ) -> list[str]:
- """
- Format an array for printing.
- Parameters
- ----------
- values
- formatter
- float_format
- na_rep
- digits
- space
- justify
- decimal
- leading_space : bool, optional, default True
- Whether the array should be formatted with a leading space.
- When an array as a column of a Series or DataFrame, we do want
- the leading space to pad between columns.
- When formatting an Index subclass
- (e.g. IntervalIndex._format_native_types), we don't want the
- leading space since it should be left-aligned.
- fallback_formatter
- Returns
- -------
- List[str]
- """
- fmt_klass: type[GenericArrayFormatter]
- if is_datetime64_dtype(values.dtype):
- fmt_klass = Datetime64Formatter
- elif isinstance(values.dtype, DatetimeTZDtype):
- fmt_klass = Datetime64TZFormatter
- elif is_timedelta64_dtype(values.dtype):
- fmt_klass = Timedelta64Formatter
- elif is_extension_array_dtype(values.dtype):
- fmt_klass = ExtensionArrayFormatter
- elif is_float_dtype(values.dtype) or is_complex_dtype(values.dtype):
- fmt_klass = FloatArrayFormatter
- elif is_integer_dtype(values.dtype):
- fmt_klass = IntArrayFormatter
- else:
- fmt_klass = GenericArrayFormatter
- if space is None:
- space = 12
- if float_format is None:
- float_format = get_option("display.float_format")
- if digits is None:
- digits = get_option("display.precision")
- fmt_obj = fmt_klass(
- values,
- digits=digits,
- na_rep=na_rep,
- float_format=float_format,
- formatter=formatter,
- space=space,
- justify=justify,
- decimal=decimal,
- leading_space=leading_space,
- quoting=quoting,
- fallback_formatter=fallback_formatter,
- )
- return fmt_obj.get_result()
- class GenericArrayFormatter:
- def __init__(
- self,
- values: Any,
- digits: int = 7,
- formatter: Callable | None = None,
- na_rep: str = "NaN",
- space: str | int = 12,
- float_format: FloatFormatType | None = None,
- justify: str = "right",
- decimal: str = ".",
- quoting: int | None = None,
- fixed_width: bool = True,
- leading_space: bool | None = True,
- fallback_formatter: Callable | None = None,
- ) -> None:
- self.values = values
- self.digits = digits
- self.na_rep = na_rep
- self.space = space
- self.formatter = formatter
- self.float_format = float_format
- self.justify = justify
- self.decimal = decimal
- self.quoting = quoting
- self.fixed_width = fixed_width
- self.leading_space = leading_space
- self.fallback_formatter = fallback_formatter
- def get_result(self) -> list[str]:
- fmt_values = self._format_strings()
- return _make_fixed_width(fmt_values, self.justify)
- def _format_strings(self) -> list[str]:
- if self.float_format is None:
- float_format = get_option("display.float_format")
- if float_format is None:
- precision = get_option("display.precision")
- float_format = lambda x: _trim_zeros_single_float(
- f"{x: .{precision:d}f}"
- )
- else:
- float_format = self.float_format
- if self.formatter is not None:
- formatter = self.formatter
- elif self.fallback_formatter is not None:
- formatter = self.fallback_formatter
- else:
- quote_strings = self.quoting is not None and self.quoting != QUOTE_NONE
- formatter = partial(
- printing.pprint_thing,
- escape_chars=("\t", "\r", "\n"),
- quote_strings=quote_strings,
- )
- def _format(x):
- if self.na_rep is not None and is_scalar(x) and isna(x):
- try:
- # try block for np.isnat specifically
- # determine na_rep if x is None or NaT-like
- if x is None:
- return "None"
- elif x is NA:
- return str(NA)
- elif x is NaT or np.isnat(x):
- return "NaT"
- except (TypeError, ValueError):
- # np.isnat only handles datetime or timedelta objects
- pass
- return self.na_rep
- elif isinstance(x, PandasObject):
- return str(x)
- elif isinstance(x, StringDtype):
- return repr(x)
- else:
- # object dtype
- return str(formatter(x))
- vals = extract_array(self.values, extract_numpy=True)
- if not isinstance(vals, np.ndarray):
- raise TypeError(
- "ExtensionArray formatting should use ExtensionArrayFormatter"
- )
- inferred = lib.map_infer(vals, is_float)
- is_float_type = (
- inferred
- # vals may have 2 or more dimensions
- & np.all(notna(vals), axis=tuple(range(1, len(vals.shape))))
- )
- leading_space = self.leading_space
- if leading_space is None:
- leading_space = is_float_type.any()
- fmt_values = []
- for i, v in enumerate(vals):
- if (not is_float_type[i] or self.formatter is not None) and leading_space:
- fmt_values.append(f" {_format(v)}")
- elif is_float_type[i]:
- fmt_values.append(float_format(v))
- else:
- if leading_space is False:
- # False specifically, so that the default is
- # to include a space if we get here.
- tpl = "{v}"
- else:
- tpl = " {v}"
- fmt_values.append(tpl.format(v=_format(v)))
- return fmt_values
- class FloatArrayFormatter(GenericArrayFormatter):
- def __init__(self, *args, **kwargs) -> None:
- super().__init__(*args, **kwargs)
- # float_format is expected to be a string
- # formatter should be used to pass a function
- if self.float_format is not None and self.formatter is None:
- # GH21625, GH22270
- self.fixed_width = False
- if callable(self.float_format):
- self.formatter = self.float_format
- self.float_format = None
- def _value_formatter(
- self,
- float_format: FloatFormatType | None = None,
- threshold: float | None = None,
- ) -> Callable:
- """Returns a function to be applied on each value to format it"""
- # the float_format parameter supersedes self.float_format
- if float_format is None:
- float_format = self.float_format
- # we are going to compose different functions, to first convert to
- # a string, then replace the decimal symbol, and finally chop according
- # to the threshold
- # when there is no float_format, we use str instead of '%g'
- # because str(0.0) = '0.0' while '%g' % 0.0 = '0'
- if float_format:
- def base_formatter(v):
- assert float_format is not None # for mypy
- # error: "str" not callable
- # error: Unexpected keyword argument "value" for "__call__" of
- # "EngFormatter"
- return (
- float_format(value=v) # type: ignore[operator,call-arg]
- if notna(v)
- else self.na_rep
- )
- else:
- def base_formatter(v):
- return str(v) if notna(v) else self.na_rep
- if self.decimal != ".":
- def decimal_formatter(v):
- return base_formatter(v).replace(".", self.decimal, 1)
- else:
- decimal_formatter = base_formatter
- if threshold is None:
- return decimal_formatter
- def formatter(value):
- if notna(value):
- if abs(value) > threshold:
- return decimal_formatter(value)
- else:
- return decimal_formatter(0.0)
- else:
- return self.na_rep
- return formatter
- def get_result_as_array(self) -> np.ndarray:
- """
- Returns the float values converted into strings using
- the parameters given at initialisation, as a numpy array
- """
- def format_with_na_rep(values: ArrayLike, formatter: Callable, na_rep: str):
- mask = isna(values)
- formatted = np.array(
- [
- formatter(val) if not m else na_rep
- for val, m in zip(values.ravel(), mask.ravel())
- ]
- ).reshape(values.shape)
- return formatted
- if self.formatter is not None:
- return format_with_na_rep(self.values, self.formatter, self.na_rep)
- if self.fixed_width:
- threshold = get_option("display.chop_threshold")
- else:
- threshold = None
- # if we have a fixed_width, we'll need to try different float_format
- def format_values_with(float_format):
- formatter = self._value_formatter(float_format, threshold)
- # default formatter leaves a space to the left when formatting
- # floats, must be consistent for left-justifying NaNs (GH #25061)
- if self.justify == "left":
- na_rep = " " + self.na_rep
- else:
- na_rep = self.na_rep
- # separate the wheat from the chaff
- values = self.values
- is_complex = is_complex_dtype(values)
- values = format_with_na_rep(values, formatter, na_rep)
- if self.fixed_width:
- if is_complex:
- result = _trim_zeros_complex(values, self.decimal)
- else:
- result = _trim_zeros_float(values, self.decimal)
- return np.asarray(result, dtype="object")
- return values
- # There is a special default string when we are fixed-width
- # The default is otherwise to use str instead of a formatting string
- float_format: FloatFormatType | None
- if self.float_format is None:
- if self.fixed_width:
- if self.leading_space is True:
- fmt_str = "{value: .{digits:d}f}"
- else:
- fmt_str = "{value:.{digits:d}f}"
- float_format = partial(fmt_str.format, digits=self.digits)
- else:
- float_format = self.float_format
- else:
- float_format = lambda value: self.float_format % value
- formatted_values = format_values_with(float_format)
- if not self.fixed_width:
- return formatted_values
- # we need do convert to engineering format if some values are too small
- # and would appear as 0, or if some values are too big and take too
- # much space
- if len(formatted_values) > 0:
- maxlen = max(len(x) for x in formatted_values)
- too_long = maxlen > self.digits + 6
- else:
- too_long = False
- with np.errstate(invalid="ignore"):
- abs_vals = np.abs(self.values)
- # this is pretty arbitrary for now
- # large values: more that 8 characters including decimal symbol
- # and first digit, hence > 1e6
- has_large_values = (abs_vals > 1e6).any()
- has_small_values = (
- (abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)
- ).any()
- if has_small_values or (too_long and has_large_values):
- if self.leading_space is True:
- fmt_str = "{value: .{digits:d}e}"
- else:
- fmt_str = "{value:.{digits:d}e}"
- float_format = partial(fmt_str.format, digits=self.digits)
- formatted_values = format_values_with(float_format)
- return formatted_values
- def _format_strings(self) -> list[str]:
- return list(self.get_result_as_array())
- class IntArrayFormatter(GenericArrayFormatter):
- def _format_strings(self) -> list[str]:
- if self.leading_space is False:
- formatter_str = lambda x: f"{x:d}".format(x=x)
- else:
- formatter_str = lambda x: f"{x: d}".format(x=x)
- formatter = self.formatter or formatter_str
- fmt_values = [formatter(x) for x in self.values]
- return fmt_values
- class Datetime64Formatter(GenericArrayFormatter):
- def __init__(
- self,
- values: np.ndarray | Series | DatetimeIndex | DatetimeArray,
- nat_rep: str = "NaT",
- date_format: None = None,
- **kwargs,
- ) -> None:
- super().__init__(values, **kwargs)
- self.nat_rep = nat_rep
- self.date_format = date_format
- def _format_strings(self) -> list[str]:
- """we by definition have DO NOT have a TZ"""
- values = self.values
- if not isinstance(values, DatetimeIndex):
- values = DatetimeIndex(values)
- if self.formatter is not None and callable(self.formatter):
- return [self.formatter(x) for x in values]
- fmt_values = values._data._format_native_types(
- na_rep=self.nat_rep, date_format=self.date_format
- )
- return fmt_values.tolist()
- class ExtensionArrayFormatter(GenericArrayFormatter):
- def _format_strings(self) -> list[str]:
- values = extract_array(self.values, extract_numpy=True)
- formatter = self.formatter
- fallback_formatter = None
- if formatter is None:
- fallback_formatter = values._formatter(boxed=True)
- if isinstance(values, Categorical):
- # Categorical is special for now, so that we can preserve tzinfo
- array = values._internal_get_values()
- else:
- array = np.asarray(values)
- fmt_values = format_array(
- array,
- formatter,
- float_format=self.float_format,
- na_rep=self.na_rep,
- digits=self.digits,
- space=self.space,
- justify=self.justify,
- decimal=self.decimal,
- leading_space=self.leading_space,
- quoting=self.quoting,
- fallback_formatter=fallback_formatter,
- )
- return fmt_values
- def format_percentiles(
- percentiles: (np.ndarray | Sequence[float]),
- ) -> list[str]:
- """
- Outputs rounded and formatted percentiles.
- Parameters
- ----------
- percentiles : list-like, containing floats from interval [0,1]
- Returns
- -------
- formatted : list of strings
- Notes
- -----
- Rounding precision is chosen so that: (1) if any two elements of
- ``percentiles`` differ, they remain different after rounding
- (2) no entry is *rounded* to 0% or 100%.
- Any non-integer is always rounded to at least 1 decimal place.
- Examples
- --------
- Keeps all entries different after rounding:
- >>> format_percentiles([0.01999, 0.02001, 0.5, 0.666666, 0.9999])
- ['1.999%', '2.001%', '50%', '66.667%', '99.99%']
- No element is rounded to 0% or 100% (unless already equal to it).
- Duplicates are allowed:
- >>> format_percentiles([0, 0.5, 0.02001, 0.5, 0.666666, 0.9999])
- ['0%', '50%', '2.0%', '50%', '66.67%', '99.99%']
- """
- percentiles = np.asarray(percentiles)
- # It checks for np.NaN as well
- with np.errstate(invalid="ignore"):
- if (
- not is_numeric_dtype(percentiles)
- or not np.all(percentiles >= 0)
- or not np.all(percentiles <= 1)
- ):
- raise ValueError("percentiles should all be in the interval [0,1]")
- percentiles = 100 * percentiles
- percentiles_round_type = percentiles.round().astype(int)
- int_idx = np.isclose(percentiles_round_type, percentiles)
- if np.all(int_idx):
- out = percentiles_round_type.astype(str)
- return [i + "%" for i in out]
- unique_pcts = np.unique(percentiles)
- to_begin = unique_pcts[0] if unique_pcts[0] > 0 else None
- to_end = 100 - unique_pcts[-1] if unique_pcts[-1] < 100 else None
- # Least precision that keeps percentiles unique after rounding
- prec = -np.floor(
- np.log10(np.min(np.ediff1d(unique_pcts, to_begin=to_begin, to_end=to_end)))
- ).astype(int)
- prec = max(1, prec)
- out = np.empty_like(percentiles, dtype=object)
- out[int_idx] = percentiles[int_idx].round().astype(int).astype(str)
- out[~int_idx] = percentiles[~int_idx].round(prec).astype(str)
- return [i + "%" for i in out]
- def is_dates_only(values: np.ndarray | DatetimeArray | Index | DatetimeIndex) -> bool:
- # return a boolean if we are only dates (and don't have a timezone)
- if not isinstance(values, Index):
- values = values.ravel()
- if not isinstance(values, (DatetimeArray, DatetimeIndex)):
- values = DatetimeIndex(values)
- if values.tz is not None:
- return False
- values_int = values.asi8
- consider_values = values_int != iNaT
- # error: Argument 1 to "py_get_unit_from_dtype" has incompatible type
- # "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]"
- reso = get_unit_from_dtype(values.dtype) # type: ignore[arg-type]
- ppd = periods_per_day(reso)
- # TODO: can we reuse is_date_array_normalized? would need a skipna kwd
- even_days = np.logical_and(consider_values, values_int % ppd != 0).sum() == 0
- if even_days:
- return True
- return False
- def _format_datetime64(x: NaTType | Timestamp, nat_rep: str = "NaT") -> str:
- if x is NaT:
- return nat_rep
- # Timestamp.__str__ falls back to datetime.datetime.__str__ = isoformat(sep=' ')
- # so it already uses string formatting rather than strftime (faster).
- return str(x)
- def _format_datetime64_dateonly(
- x: NaTType | Timestamp,
- nat_rep: str = "NaT",
- date_format: str | None = None,
- ) -> str:
- if isinstance(x, NaTType):
- return nat_rep
- if date_format:
- return x.strftime(date_format)
- else:
- # Timestamp._date_repr relies on string formatting (faster than strftime)
- return x._date_repr
- def get_format_datetime64(
- is_dates_only_: bool, nat_rep: str = "NaT", date_format: str | None = None
- ) -> Callable:
- """Return a formatter callable taking a datetime64 as input and providing
- a string as output"""
- if is_dates_only_:
- return lambda x: _format_datetime64_dateonly(
- x, nat_rep=nat_rep, date_format=date_format
- )
- else:
- return lambda x: _format_datetime64(x, nat_rep=nat_rep)
- def get_format_datetime64_from_values(
- values: np.ndarray | DatetimeArray | DatetimeIndex, date_format: str | None
- ) -> str | None:
- """given values and a date_format, return a string format"""
- if isinstance(values, np.ndarray) and values.ndim > 1:
- # We don't actually care about the order of values, and DatetimeIndex
- # only accepts 1D values
- values = values.ravel()
- ido = is_dates_only(values)
- if ido:
- # Only dates and no timezone: provide a default format
- return date_format or "%Y-%m-%d"
- return date_format
- class Datetime64TZFormatter(Datetime64Formatter):
- def _format_strings(self) -> list[str]:
- """we by definition have a TZ"""
- values = self.values.astype(object)
- ido = is_dates_only(values)
- formatter = self.formatter or get_format_datetime64(
- ido, date_format=self.date_format
- )
- fmt_values = [formatter(x) for x in values]
- return fmt_values
- class Timedelta64Formatter(GenericArrayFormatter):
- def __init__(
- self,
- values: np.ndarray | TimedeltaIndex,
- nat_rep: str = "NaT",
- box: bool = False,
- **kwargs,
- ) -> None:
- super().__init__(values, **kwargs)
- self.nat_rep = nat_rep
- self.box = box
- def _format_strings(self) -> list[str]:
- formatter = self.formatter or get_format_timedelta64(
- self.values, nat_rep=self.nat_rep, box=self.box
- )
- return [formatter(x) for x in self.values]
- def get_format_timedelta64(
- values: np.ndarray | TimedeltaIndex | TimedeltaArray,
- nat_rep: str | float = "NaT",
- box: bool = False,
- ) -> Callable:
- """
- Return a formatter function for a range of timedeltas.
- These will all have the same format argument
- If box, then show the return in quotes
- """
- values_int = values.view(np.int64)
- consider_values = values_int != iNaT
- one_day_nanos = 86400 * 10**9
- # error: Unsupported operand types for % ("ExtensionArray" and "int")
- not_midnight = values_int % one_day_nanos != 0 # type: ignore[operator]
- # error: Argument 1 to "__call__" of "ufunc" has incompatible type
- # "Union[Any, ExtensionArray, ndarray]"; expected
- # "Union[Union[int, float, complex, str, bytes, generic],
- # Sequence[Union[int, float, complex, str, bytes, generic]],
- # Sequence[Sequence[Any]], _SupportsArray]"
- both = np.logical_and(consider_values, not_midnight) # type: ignore[arg-type]
- even_days = both.sum() == 0
- if even_days:
- format = None
- else:
- format = "long"
- def _formatter(x):
- if x is None or (is_scalar(x) and isna(x)):
- return nat_rep
- if not isinstance(x, Timedelta):
- x = Timedelta(x)
- # Timedelta._repr_base uses string formatting (faster than strftime)
- result = x._repr_base(format=format)
- if box:
- result = f"'{result}'"
- return result
- return _formatter
- def _make_fixed_width(
- strings: list[str],
- justify: str = "right",
- minimum: int | None = None,
- adj: TextAdjustment | None = None,
- ) -> list[str]:
- if len(strings) == 0 or justify == "all":
- return strings
- if adj is None:
- adjustment = get_adjustment()
- else:
- adjustment = adj
- max_len = max(adjustment.len(x) for x in strings)
- if minimum is not None:
- max_len = max(minimum, max_len)
- conf_max = get_option("display.max_colwidth")
- if conf_max is not None and max_len > conf_max:
- max_len = conf_max
- def just(x: str) -> str:
- if conf_max is not None:
- if (conf_max > 3) & (adjustment.len(x) > max_len):
- x = x[: max_len - 3] + "..."
- return x
- strings = [just(x) for x in strings]
- result = adjustment.justify(strings, max_len, mode=justify)
- return result
- def _trim_zeros_complex(str_complexes: np.ndarray, decimal: str = ".") -> list[str]:
- """
- Separates the real and imaginary parts from the complex number, and
- executes the _trim_zeros_float method on each of those.
- """
- trimmed = [
- "".join(_trim_zeros_float(re.split(r"([j+-])", x), decimal))
- for x in str_complexes
- ]
- # pad strings to the length of the longest trimmed string for alignment
- lengths = [len(s) for s in trimmed]
- max_length = max(lengths)
- padded = [
- s[: -((k - 1) // 2 + 1)] # real part
- + (max_length - k) // 2 * "0"
- + s[-((k - 1) // 2 + 1) : -((k - 1) // 2)] # + / -
- + s[-((k - 1) // 2) : -1] # imaginary part
- + (max_length - k) // 2 * "0"
- + s[-1]
- for s, k in zip(trimmed, lengths)
- ]
- return padded
- def _trim_zeros_single_float(str_float: str) -> str:
- """
- Trims trailing zeros after a decimal point,
- leaving just one if necessary.
- """
- str_float = str_float.rstrip("0")
- if str_float.endswith("."):
- str_float += "0"
- return str_float
- def _trim_zeros_float(
- str_floats: np.ndarray | list[str], decimal: str = "."
- ) -> list[str]:
- """
- Trims the maximum number of trailing zeros equally from
- all numbers containing decimals, leaving just one if
- necessary.
- """
- trimmed = str_floats
- number_regex = re.compile(rf"^\s*[\+-]?[0-9]+\{decimal}[0-9]*$")
- def is_number_with_decimal(x) -> bool:
- return re.match(number_regex, x) is not None
- def should_trim(values: np.ndarray | list[str]) -> bool:
- """
- Determine if an array of strings should be trimmed.
- Returns True if all numbers containing decimals (defined by the
- above regular expression) within the array end in a zero, otherwise
- returns False.
- """
- numbers = [x for x in values if is_number_with_decimal(x)]
- return len(numbers) > 0 and all(x.endswith("0") for x in numbers)
- while should_trim(trimmed):
- trimmed = [x[:-1] if is_number_with_decimal(x) else x for x in trimmed]
- # leave one 0 after the decimal points if need be.
- result = [
- x + "0" if is_number_with_decimal(x) and x.endswith(decimal) else x
- for x in trimmed
- ]
- return result
- def _has_names(index: Index) -> bool:
- if isinstance(index, MultiIndex):
- return com.any_not_none(*index.names)
- else:
- return index.name is not None
- class EngFormatter:
- """
- Formats float values according to engineering format.
- Based on matplotlib.ticker.EngFormatter
- """
- # The SI engineering prefixes
- ENG_PREFIXES = {
- -24: "y",
- -21: "z",
- -18: "a",
- -15: "f",
- -12: "p",
- -9: "n",
- -6: "u",
- -3: "m",
- 0: "",
- 3: "k",
- 6: "M",
- 9: "G",
- 12: "T",
- 15: "P",
- 18: "E",
- 21: "Z",
- 24: "Y",
- }
- def __init__(
- self, accuracy: int | None = None, use_eng_prefix: bool = False
- ) -> None:
- self.accuracy = accuracy
- self.use_eng_prefix = use_eng_prefix
- def __call__(self, num: float) -> str:
- """
- Formats a number in engineering notation, appending a letter
- representing the power of 1000 of the original number. Some examples:
- >>> format_eng = EngFormatter(accuracy=0, use_eng_prefix=True)
- >>> format_eng(0)
- ' 0'
- >>> format_eng = EngFormatter(accuracy=1, use_eng_prefix=True)
- >>> format_eng(1_000_000)
- ' 1.0M'
- >>> format_eng = EngFormatter(accuracy=2, use_eng_prefix=False)
- >>> format_eng("-1e-6")
- '-1.00E-06'
- @param num: the value to represent
- @type num: either a numeric value or a string that can be converted to
- a numeric value (as per decimal.Decimal constructor)
- @return: engineering formatted string
- """
- dnum = Decimal(str(num))
- if Decimal.is_nan(dnum):
- return "NaN"
- if Decimal.is_infinite(dnum):
- return "inf"
- sign = 1
- if dnum < 0: # pragma: no cover
- sign = -1
- dnum = -dnum
- if dnum != 0:
- pow10 = Decimal(int(math.floor(dnum.log10() / 3) * 3))
- else:
- pow10 = Decimal(0)
- pow10 = pow10.min(max(self.ENG_PREFIXES.keys()))
- pow10 = pow10.max(min(self.ENG_PREFIXES.keys()))
- int_pow10 = int(pow10)
- if self.use_eng_prefix:
- prefix = self.ENG_PREFIXES[int_pow10]
- else:
- if int_pow10 < 0:
- prefix = f"E-{-int_pow10:02d}"
- else:
- prefix = f"E+{int_pow10:02d}"
- mant = sign * dnum / (10**pow10)
- if self.accuracy is None: # pragma: no cover
- format_str = "{mant: g}{prefix}"
- else:
- format_str = f"{{mant: .{self.accuracy:d}f}}{{prefix}}"
- formatted = format_str.format(mant=mant, prefix=prefix)
- return formatted
- def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> None:
- """
- Format float representation in DataFrame with SI notation.
- Parameters
- ----------
- accuracy : int, default 3
- Number of decimal digits after the floating point.
- use_eng_prefix : bool, default False
- Whether to represent a value with SI prefixes.
- Returns
- -------
- None
- Examples
- --------
- >>> df = pd.DataFrame([1e-9, 1e-3, 1, 1e3, 1e6])
- >>> df
- 0
- 0 1.000000e-09
- 1 1.000000e-03
- 2 1.000000e+00
- 3 1.000000e+03
- 4 1.000000e+06
- >>> pd.set_eng_float_format(accuracy=1)
- >>> df
- 0
- 0 1.0E-09
- 1 1.0E-03
- 2 1.0E+00
- 3 1.0E+03
- 4 1.0E+06
- >>> pd.set_eng_float_format(use_eng_prefix=True)
- >>> df
- 0
- 0 1.000n
- 1 1.000m
- 2 1.000
- 3 1.000k
- 4 1.000M
- >>> pd.set_eng_float_format(accuracy=1, use_eng_prefix=True)
- >>> df
- 0
- 0 1.0n
- 1 1.0m
- 2 1.0
- 3 1.0k
- 4 1.0M
- >>> pd.set_option("display.float_format", None) # unset option
- """
- set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix))
- def get_level_lengths(
- levels: Any, sentinel: bool | object | str = ""
- ) -> list[dict[int, int]]:
- """
- For each index in each level the function returns lengths of indexes.
- Parameters
- ----------
- levels : list of lists
- List of values on for level.
- sentinel : string, optional
- Value which states that no new index starts on there.
- Returns
- -------
- Returns list of maps. For each level returns map of indexes (key is index
- in row and value is length of index).
- """
- if len(levels) == 0:
- return []
- control = [True] * len(levels[0])
- result = []
- for level in levels:
- last_index = 0
- lengths = {}
- for i, key in enumerate(level):
- if control[i] and key == sentinel:
- pass
- else:
- control[i] = False
- lengths[last_index] = i - last_index
- last_index = i
- lengths[last_index] = len(level) - last_index
- result.append(lengths)
- return result
- def buffer_put_lines(buf: WriteBuffer[str], lines: list[str]) -> None:
- """
- Appends lines to a buffer.
- Parameters
- ----------
- buf
- The buffer to write to
- lines
- The lines to append.
- """
- if any(isinstance(x, str) for x in lines):
- lines = [str(x) for x in lines]
- buf.write("\n".join(lines))
|