123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101 |
- from __future__ import annotations
- from abc import (
- ABC,
- abstractmethod,
- )
- import sys
- from textwrap import dedent
- from typing import (
- TYPE_CHECKING,
- Iterable,
- Iterator,
- Mapping,
- Sequence,
- )
- from pandas._config import get_option
- from pandas._typing import (
- Dtype,
- WriteBuffer,
- )
- from pandas.io.formats import format as fmt
- from pandas.io.formats.printing import pprint_thing
- if TYPE_CHECKING:
- from pandas import (
- DataFrame,
- Index,
- Series,
- )
- frame_max_cols_sub = dedent(
- """\
- max_cols : int, optional
- When to switch from the verbose to the truncated output. If the
- DataFrame has more than `max_cols` columns, the truncated output
- is used. By default, the setting in
- ``pandas.options.display.max_info_columns`` is used."""
- )
- show_counts_sub = dedent(
- """\
- show_counts : bool, optional
- Whether to show the non-null counts. By default, this is shown
- only if the DataFrame is smaller than
- ``pandas.options.display.max_info_rows`` and
- ``pandas.options.display.max_info_columns``. A value of True always
- shows the counts, and False never shows the counts."""
- )
- frame_examples_sub = dedent(
- """\
- >>> int_values = [1, 2, 3, 4, 5]
- >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']
- >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]
- >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,
- ... "float_col": float_values})
- >>> df
- int_col text_col float_col
- 0 1 alpha 0.00
- 1 2 beta 0.25
- 2 3 gamma 0.50
- 3 4 delta 0.75
- 4 5 epsilon 1.00
- Prints information of all columns:
- >>> df.info(verbose=True)
- <class 'pandas.core.frame.DataFrame'>
- RangeIndex: 5 entries, 0 to 4
- Data columns (total 3 columns):
- # Column Non-Null Count Dtype
- --- ------ -------------- -----
- 0 int_col 5 non-null int64
- 1 text_col 5 non-null object
- 2 float_col 5 non-null float64
- dtypes: float64(1), int64(1), object(1)
- memory usage: 248.0+ bytes
- Prints a summary of columns count and its dtypes but not per column
- information:
- >>> df.info(verbose=False)
- <class 'pandas.core.frame.DataFrame'>
- RangeIndex: 5 entries, 0 to 4
- Columns: 3 entries, int_col to float_col
- dtypes: float64(1), int64(1), object(1)
- memory usage: 248.0+ bytes
- Pipe output of DataFrame.info to buffer instead of sys.stdout, get
- buffer content and writes to a text file:
- >>> import io
- >>> buffer = io.StringIO()
- >>> df.info(buf=buffer)
- >>> s = buffer.getvalue()
- >>> with open("df_info.txt", "w",
- ... encoding="utf-8") as f: # doctest: +SKIP
- ... f.write(s)
- 260
- The `memory_usage` parameter allows deep introspection mode, specially
- useful for big DataFrames and fine-tune memory optimization:
- >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)
- >>> df = pd.DataFrame({
- ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),
- ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),
- ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)
- ... })
- >>> df.info()
- <class 'pandas.core.frame.DataFrame'>
- RangeIndex: 1000000 entries, 0 to 999999
- Data columns (total 3 columns):
- # Column Non-Null Count Dtype
- --- ------ -------------- -----
- 0 column_1 1000000 non-null object
- 1 column_2 1000000 non-null object
- 2 column_3 1000000 non-null object
- dtypes: object(3)
- memory usage: 22.9+ MB
- >>> df.info(memory_usage='deep')
- <class 'pandas.core.frame.DataFrame'>
- RangeIndex: 1000000 entries, 0 to 999999
- Data columns (total 3 columns):
- # Column Non-Null Count Dtype
- --- ------ -------------- -----
- 0 column_1 1000000 non-null object
- 1 column_2 1000000 non-null object
- 2 column_3 1000000 non-null object
- dtypes: object(3)
- memory usage: 165.9 MB"""
- )
- frame_see_also_sub = dedent(
- """\
- DataFrame.describe: Generate descriptive statistics of DataFrame
- columns.
- DataFrame.memory_usage: Memory usage of DataFrame columns."""
- )
- frame_sub_kwargs = {
- "klass": "DataFrame",
- "type_sub": " and columns",
- "max_cols_sub": frame_max_cols_sub,
- "show_counts_sub": show_counts_sub,
- "examples_sub": frame_examples_sub,
- "see_also_sub": frame_see_also_sub,
- "version_added_sub": "",
- }
- series_examples_sub = dedent(
- """\
- >>> int_values = [1, 2, 3, 4, 5]
- >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']
- >>> s = pd.Series(text_values, index=int_values)
- >>> s.info()
- <class 'pandas.core.series.Series'>
- Index: 5 entries, 1 to 5
- Series name: None
- Non-Null Count Dtype
- -------------- -----
- 5 non-null object
- dtypes: object(1)
- memory usage: 80.0+ bytes
- Prints a summary excluding information about its values:
- >>> s.info(verbose=False)
- <class 'pandas.core.series.Series'>
- Index: 5 entries, 1 to 5
- dtypes: object(1)
- memory usage: 80.0+ bytes
- Pipe output of Series.info to buffer instead of sys.stdout, get
- buffer content and writes to a text file:
- >>> import io
- >>> buffer = io.StringIO()
- >>> s.info(buf=buffer)
- >>> s = buffer.getvalue()
- >>> with open("df_info.txt", "w",
- ... encoding="utf-8") as f: # doctest: +SKIP
- ... f.write(s)
- 260
- The `memory_usage` parameter allows deep introspection mode, specially
- useful for big Series and fine-tune memory optimization:
- >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)
- >>> s = pd.Series(np.random.choice(['a', 'b', 'c'], 10 ** 6))
- >>> s.info()
- <class 'pandas.core.series.Series'>
- RangeIndex: 1000000 entries, 0 to 999999
- Series name: None
- Non-Null Count Dtype
- -------------- -----
- 1000000 non-null object
- dtypes: object(1)
- memory usage: 7.6+ MB
- >>> s.info(memory_usage='deep')
- <class 'pandas.core.series.Series'>
- RangeIndex: 1000000 entries, 0 to 999999
- Series name: None
- Non-Null Count Dtype
- -------------- -----
- 1000000 non-null object
- dtypes: object(1)
- memory usage: 55.3 MB"""
- )
- series_see_also_sub = dedent(
- """\
- Series.describe: Generate descriptive statistics of Series.
- Series.memory_usage: Memory usage of Series."""
- )
- series_sub_kwargs = {
- "klass": "Series",
- "type_sub": "",
- "max_cols_sub": "",
- "show_counts_sub": show_counts_sub,
- "examples_sub": series_examples_sub,
- "see_also_sub": series_see_also_sub,
- "version_added_sub": "\n.. versionadded:: 1.4.0\n",
- }
- INFO_DOCSTRING = dedent(
- """
- Print a concise summary of a {klass}.
- This method prints information about a {klass} including
- the index dtype{type_sub}, non-null values and memory usage.
- {version_added_sub}\
- Parameters
- ----------
- verbose : bool, optional
- Whether to print the full summary. By default, the setting in
- ``pandas.options.display.max_info_columns`` is followed.
- buf : writable buffer, defaults to sys.stdout
- Where to send the output. By default, the output is printed to
- sys.stdout. Pass a writable buffer if you need to further process
- the output.
- {max_cols_sub}
- memory_usage : bool, str, optional
- Specifies whether total memory usage of the {klass}
- elements (including the index) should be displayed. By default,
- this follows the ``pandas.options.display.memory_usage`` setting.
- True always show memory usage. False never shows memory usage.
- A value of 'deep' is equivalent to "True with deep introspection".
- Memory usage is shown in human-readable units (base-2
- representation). Without deep introspection a memory estimation is
- made based in column dtype and number of rows assuming values
- consume the same memory amount for corresponding dtypes. With deep
- memory introspection, a real memory usage calculation is performed
- at the cost of computational resources. See the
- :ref:`Frequently Asked Questions <df-memory-usage>` for more
- details.
- {show_counts_sub}
- Returns
- -------
- None
- This method prints a summary of a {klass} and returns None.
- See Also
- --------
- {see_also_sub}
- Examples
- --------
- {examples_sub}
- """
- )
- def _put_str(s: str | Dtype, space: int) -> str:
- """
- Make string of specified length, padding to the right if necessary.
- Parameters
- ----------
- s : Union[str, Dtype]
- String to be formatted.
- space : int
- Length to force string to be of.
- Returns
- -------
- str
- String coerced to given length.
- Examples
- --------
- >>> pd.io.formats.info._put_str("panda", 6)
- 'panda '
- >>> pd.io.formats.info._put_str("panda", 4)
- 'pand'
- """
- return str(s)[:space].ljust(space)
- def _sizeof_fmt(num: float, size_qualifier: str) -> str:
- """
- Return size in human readable format.
- Parameters
- ----------
- num : int
- Size in bytes.
- size_qualifier : str
- Either empty, or '+' (if lower bound).
- Returns
- -------
- str
- Size in human readable format.
- Examples
- --------
- >>> _sizeof_fmt(23028, '')
- '22.5 KB'
- >>> _sizeof_fmt(23028, '+')
- '22.5+ KB'
- """
- for x in ["bytes", "KB", "MB", "GB", "TB"]:
- if num < 1024.0:
- return f"{num:3.1f}{size_qualifier} {x}"
- num /= 1024.0
- return f"{num:3.1f}{size_qualifier} PB"
- def _initialize_memory_usage(
- memory_usage: bool | str | None = None,
- ) -> bool | str:
- """Get memory usage based on inputs and display options."""
- if memory_usage is None:
- memory_usage = get_option("display.memory_usage")
- return memory_usage
- class BaseInfo(ABC):
- """
- Base class for DataFrameInfo and SeriesInfo.
- Parameters
- ----------
- data : DataFrame or Series
- Either dataframe or series.
- memory_usage : bool or str, optional
- If "deep", introspect the data deeply by interrogating object dtypes
- for system-level memory consumption, and include it in the returned
- values.
- """
- data: DataFrame | Series
- memory_usage: bool | str
- @property
- @abstractmethod
- def dtypes(self) -> Iterable[Dtype]:
- """
- Dtypes.
- Returns
- -------
- dtypes : sequence
- Dtype of each of the DataFrame's columns (or one series column).
- """
- @property
- @abstractmethod
- def dtype_counts(self) -> Mapping[str, int]:
- """Mapping dtype - number of counts."""
- @property
- @abstractmethod
- def non_null_counts(self) -> Sequence[int]:
- """Sequence of non-null counts for all columns or column (if series)."""
- @property
- @abstractmethod
- def memory_usage_bytes(self) -> int:
- """
- Memory usage in bytes.
- Returns
- -------
- memory_usage_bytes : int
- Object's total memory usage in bytes.
- """
- @property
- def memory_usage_string(self) -> str:
- """Memory usage in a form of human readable string."""
- return f"{_sizeof_fmt(self.memory_usage_bytes, self.size_qualifier)}\n"
- @property
- def size_qualifier(self) -> str:
- size_qualifier = ""
- if self.memory_usage:
- if self.memory_usage != "deep":
- # size_qualifier is just a best effort; not guaranteed to catch
- # all cases (e.g., it misses categorical data even with object
- # categories)
- if (
- "object" in self.dtype_counts
- or self.data.index._is_memory_usage_qualified()
- ):
- size_qualifier = "+"
- return size_qualifier
- @abstractmethod
- def render(
- self,
- *,
- buf: WriteBuffer[str] | None,
- max_cols: int | None,
- verbose: bool | None,
- show_counts: bool | None,
- ) -> None:
- pass
- class DataFrameInfo(BaseInfo):
- """
- Class storing dataframe-specific info.
- """
- def __init__(
- self,
- data: DataFrame,
- memory_usage: bool | str | None = None,
- ) -> None:
- self.data: DataFrame = data
- self.memory_usage = _initialize_memory_usage(memory_usage)
- @property
- def dtype_counts(self) -> Mapping[str, int]:
- return _get_dataframe_dtype_counts(self.data)
- @property
- def dtypes(self) -> Iterable[Dtype]:
- """
- Dtypes.
- Returns
- -------
- dtypes
- Dtype of each of the DataFrame's columns.
- """
- return self.data.dtypes
- @property
- def ids(self) -> Index:
- """
- Column names.
- Returns
- -------
- ids : Index
- DataFrame's column names.
- """
- return self.data.columns
- @property
- def col_count(self) -> int:
- """Number of columns to be summarized."""
- return len(self.ids)
- @property
- def non_null_counts(self) -> Sequence[int]:
- """Sequence of non-null counts for all columns or column (if series)."""
- return self.data.count()
- @property
- def memory_usage_bytes(self) -> int:
- deep = self.memory_usage == "deep"
- return self.data.memory_usage(index=True, deep=deep).sum()
- def render(
- self,
- *,
- buf: WriteBuffer[str] | None,
- max_cols: int | None,
- verbose: bool | None,
- show_counts: bool | None,
- ) -> None:
- printer = DataFrameInfoPrinter(
- info=self,
- max_cols=max_cols,
- verbose=verbose,
- show_counts=show_counts,
- )
- printer.to_buffer(buf)
- class SeriesInfo(BaseInfo):
- """
- Class storing series-specific info.
- """
- def __init__(
- self,
- data: Series,
- memory_usage: bool | str | None = None,
- ) -> None:
- self.data: Series = data
- self.memory_usage = _initialize_memory_usage(memory_usage)
- def render(
- self,
- *,
- buf: WriteBuffer[str] | None = None,
- max_cols: int | None = None,
- verbose: bool | None = None,
- show_counts: bool | None = None,
- ) -> None:
- if max_cols is not None:
- raise ValueError(
- "Argument `max_cols` can only be passed "
- "in DataFrame.info, not Series.info"
- )
- printer = SeriesInfoPrinter(
- info=self,
- verbose=verbose,
- show_counts=show_counts,
- )
- printer.to_buffer(buf)
- @property
- def non_null_counts(self) -> Sequence[int]:
- return [self.data.count()]
- @property
- def dtypes(self) -> Iterable[Dtype]:
- return [self.data.dtypes]
- @property
- def dtype_counts(self) -> Mapping[str, int]:
- from pandas.core.frame import DataFrame
- return _get_dataframe_dtype_counts(DataFrame(self.data))
- @property
- def memory_usage_bytes(self) -> int:
- """Memory usage in bytes.
- Returns
- -------
- memory_usage_bytes : int
- Object's total memory usage in bytes.
- """
- deep = self.memory_usage == "deep"
- return self.data.memory_usage(index=True, deep=deep)
- class InfoPrinterAbstract:
- """
- Class for printing dataframe or series info.
- """
- def to_buffer(self, buf: WriteBuffer[str] | None = None) -> None:
- """Save dataframe info into buffer."""
- table_builder = self._create_table_builder()
- lines = table_builder.get_lines()
- if buf is None: # pragma: no cover
- buf = sys.stdout
- fmt.buffer_put_lines(buf, lines)
- @abstractmethod
- def _create_table_builder(self) -> TableBuilderAbstract:
- """Create instance of table builder."""
- class DataFrameInfoPrinter(InfoPrinterAbstract):
- """
- Class for printing dataframe info.
- Parameters
- ----------
- info : DataFrameInfo
- Instance of DataFrameInfo.
- max_cols : int, optional
- When to switch from the verbose to the truncated output.
- verbose : bool, optional
- Whether to print the full summary.
- show_counts : bool, optional
- Whether to show the non-null counts.
- """
- def __init__(
- self,
- info: DataFrameInfo,
- max_cols: int | None = None,
- verbose: bool | None = None,
- show_counts: bool | None = None,
- ) -> None:
- self.info = info
- self.data = info.data
- self.verbose = verbose
- self.max_cols = self._initialize_max_cols(max_cols)
- self.show_counts = self._initialize_show_counts(show_counts)
- @property
- def max_rows(self) -> int:
- """Maximum info rows to be displayed."""
- return get_option("display.max_info_rows", len(self.data) + 1)
- @property
- def exceeds_info_cols(self) -> bool:
- """Check if number of columns to be summarized does not exceed maximum."""
- return bool(self.col_count > self.max_cols)
- @property
- def exceeds_info_rows(self) -> bool:
- """Check if number of rows to be summarized does not exceed maximum."""
- return bool(len(self.data) > self.max_rows)
- @property
- def col_count(self) -> int:
- """Number of columns to be summarized."""
- return self.info.col_count
- def _initialize_max_cols(self, max_cols: int | None) -> int:
- if max_cols is None:
- return get_option("display.max_info_columns", self.col_count + 1)
- return max_cols
- def _initialize_show_counts(self, show_counts: bool | None) -> bool:
- if show_counts is None:
- return bool(not self.exceeds_info_cols and not self.exceeds_info_rows)
- else:
- return show_counts
- def _create_table_builder(self) -> DataFrameTableBuilder:
- """
- Create instance of table builder based on verbosity and display settings.
- """
- if self.verbose:
- return DataFrameTableBuilderVerbose(
- info=self.info,
- with_counts=self.show_counts,
- )
- elif self.verbose is False: # specifically set to False, not necessarily None
- return DataFrameTableBuilderNonVerbose(info=self.info)
- else:
- if self.exceeds_info_cols:
- return DataFrameTableBuilderNonVerbose(info=self.info)
- else:
- return DataFrameTableBuilderVerbose(
- info=self.info,
- with_counts=self.show_counts,
- )
- class SeriesInfoPrinter(InfoPrinterAbstract):
- """Class for printing series info.
- Parameters
- ----------
- info : SeriesInfo
- Instance of SeriesInfo.
- verbose : bool, optional
- Whether to print the full summary.
- show_counts : bool, optional
- Whether to show the non-null counts.
- """
- def __init__(
- self,
- info: SeriesInfo,
- verbose: bool | None = None,
- show_counts: bool | None = None,
- ) -> None:
- self.info = info
- self.data = info.data
- self.verbose = verbose
- self.show_counts = self._initialize_show_counts(show_counts)
- def _create_table_builder(self) -> SeriesTableBuilder:
- """
- Create instance of table builder based on verbosity.
- """
- if self.verbose or self.verbose is None:
- return SeriesTableBuilderVerbose(
- info=self.info,
- with_counts=self.show_counts,
- )
- else:
- return SeriesTableBuilderNonVerbose(info=self.info)
- def _initialize_show_counts(self, show_counts: bool | None) -> bool:
- if show_counts is None:
- return True
- else:
- return show_counts
- class TableBuilderAbstract(ABC):
- """
- Abstract builder for info table.
- """
- _lines: list[str]
- info: BaseInfo
- @abstractmethod
- def get_lines(self) -> list[str]:
- """Product in a form of list of lines (strings)."""
- @property
- def data(self) -> DataFrame | Series:
- return self.info.data
- @property
- def dtypes(self) -> Iterable[Dtype]:
- """Dtypes of each of the DataFrame's columns."""
- return self.info.dtypes
- @property
- def dtype_counts(self) -> Mapping[str, int]:
- """Mapping dtype - number of counts."""
- return self.info.dtype_counts
- @property
- def display_memory_usage(self) -> bool:
- """Whether to display memory usage."""
- return bool(self.info.memory_usage)
- @property
- def memory_usage_string(self) -> str:
- """Memory usage string with proper size qualifier."""
- return self.info.memory_usage_string
- @property
- def non_null_counts(self) -> Sequence[int]:
- return self.info.non_null_counts
- def add_object_type_line(self) -> None:
- """Add line with string representation of dataframe to the table."""
- self._lines.append(str(type(self.data)))
- def add_index_range_line(self) -> None:
- """Add line with range of indices to the table."""
- self._lines.append(self.data.index._summary())
- def add_dtypes_line(self) -> None:
- """Add summary line with dtypes present in dataframe."""
- collected_dtypes = [
- f"{key}({val:d})" for key, val in sorted(self.dtype_counts.items())
- ]
- self._lines.append(f"dtypes: {', '.join(collected_dtypes)}")
- class DataFrameTableBuilder(TableBuilderAbstract):
- """
- Abstract builder for dataframe info table.
- Parameters
- ----------
- info : DataFrameInfo.
- Instance of DataFrameInfo.
- """
- def __init__(self, *, info: DataFrameInfo) -> None:
- self.info: DataFrameInfo = info
- def get_lines(self) -> list[str]:
- self._lines = []
- if self.col_count == 0:
- self._fill_empty_info()
- else:
- self._fill_non_empty_info()
- return self._lines
- def _fill_empty_info(self) -> None:
- """Add lines to the info table, pertaining to empty dataframe."""
- self.add_object_type_line()
- self.add_index_range_line()
- self._lines.append(f"Empty {type(self.data).__name__}\n")
- @abstractmethod
- def _fill_non_empty_info(self) -> None:
- """Add lines to the info table, pertaining to non-empty dataframe."""
- @property
- def data(self) -> DataFrame:
- """DataFrame."""
- return self.info.data
- @property
- def ids(self) -> Index:
- """Dataframe columns."""
- return self.info.ids
- @property
- def col_count(self) -> int:
- """Number of dataframe columns to be summarized."""
- return self.info.col_count
- def add_memory_usage_line(self) -> None:
- """Add line containing memory usage."""
- self._lines.append(f"memory usage: {self.memory_usage_string}")
- class DataFrameTableBuilderNonVerbose(DataFrameTableBuilder):
- """
- Dataframe info table builder for non-verbose output.
- """
- def _fill_non_empty_info(self) -> None:
- """Add lines to the info table, pertaining to non-empty dataframe."""
- self.add_object_type_line()
- self.add_index_range_line()
- self.add_columns_summary_line()
- self.add_dtypes_line()
- if self.display_memory_usage:
- self.add_memory_usage_line()
- def add_columns_summary_line(self) -> None:
- self._lines.append(self.ids._summary(name="Columns"))
- class TableBuilderVerboseMixin(TableBuilderAbstract):
- """
- Mixin for verbose info output.
- """
- SPACING: str = " " * 2
- strrows: Sequence[Sequence[str]]
- gross_column_widths: Sequence[int]
- with_counts: bool
- @property
- @abstractmethod
- def headers(self) -> Sequence[str]:
- """Headers names of the columns in verbose table."""
- @property
- def header_column_widths(self) -> Sequence[int]:
- """Widths of header columns (only titles)."""
- return [len(col) for col in self.headers]
- def _get_gross_column_widths(self) -> Sequence[int]:
- """Get widths of columns containing both headers and actual content."""
- body_column_widths = self._get_body_column_widths()
- return [
- max(*widths)
- for widths in zip(self.header_column_widths, body_column_widths)
- ]
- def _get_body_column_widths(self) -> Sequence[int]:
- """Get widths of table content columns."""
- strcols: Sequence[Sequence[str]] = list(zip(*self.strrows))
- return [max(len(x) for x in col) for col in strcols]
- def _gen_rows(self) -> Iterator[Sequence[str]]:
- """
- Generator function yielding rows content.
- Each element represents a row comprising a sequence of strings.
- """
- if self.with_counts:
- return self._gen_rows_with_counts()
- else:
- return self._gen_rows_without_counts()
- @abstractmethod
- def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:
- """Iterator with string representation of body data with counts."""
- @abstractmethod
- def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:
- """Iterator with string representation of body data without counts."""
- def add_header_line(self) -> None:
- header_line = self.SPACING.join(
- [
- _put_str(header, col_width)
- for header, col_width in zip(self.headers, self.gross_column_widths)
- ]
- )
- self._lines.append(header_line)
- def add_separator_line(self) -> None:
- separator_line = self.SPACING.join(
- [
- _put_str("-" * header_colwidth, gross_colwidth)
- for header_colwidth, gross_colwidth in zip(
- self.header_column_widths, self.gross_column_widths
- )
- ]
- )
- self._lines.append(separator_line)
- def add_body_lines(self) -> None:
- for row in self.strrows:
- body_line = self.SPACING.join(
- [
- _put_str(col, gross_colwidth)
- for col, gross_colwidth in zip(row, self.gross_column_widths)
- ]
- )
- self._lines.append(body_line)
- def _gen_non_null_counts(self) -> Iterator[str]:
- """Iterator with string representation of non-null counts."""
- for count in self.non_null_counts:
- yield f"{count} non-null"
- def _gen_dtypes(self) -> Iterator[str]:
- """Iterator with string representation of column dtypes."""
- for dtype in self.dtypes:
- yield pprint_thing(dtype)
- class DataFrameTableBuilderVerbose(DataFrameTableBuilder, TableBuilderVerboseMixin):
- """
- Dataframe info table builder for verbose output.
- """
- def __init__(
- self,
- *,
- info: DataFrameInfo,
- with_counts: bool,
- ) -> None:
- self.info = info
- self.with_counts = with_counts
- self.strrows: Sequence[Sequence[str]] = list(self._gen_rows())
- self.gross_column_widths: Sequence[int] = self._get_gross_column_widths()
- def _fill_non_empty_info(self) -> None:
- """Add lines to the info table, pertaining to non-empty dataframe."""
- self.add_object_type_line()
- self.add_index_range_line()
- self.add_columns_summary_line()
- self.add_header_line()
- self.add_separator_line()
- self.add_body_lines()
- self.add_dtypes_line()
- if self.display_memory_usage:
- self.add_memory_usage_line()
- @property
- def headers(self) -> Sequence[str]:
- """Headers names of the columns in verbose table."""
- if self.with_counts:
- return [" # ", "Column", "Non-Null Count", "Dtype"]
- return [" # ", "Column", "Dtype"]
- def add_columns_summary_line(self) -> None:
- self._lines.append(f"Data columns (total {self.col_count} columns):")
- def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:
- """Iterator with string representation of body data without counts."""
- yield from zip(
- self._gen_line_numbers(),
- self._gen_columns(),
- self._gen_dtypes(),
- )
- def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:
- """Iterator with string representation of body data with counts."""
- yield from zip(
- self._gen_line_numbers(),
- self._gen_columns(),
- self._gen_non_null_counts(),
- self._gen_dtypes(),
- )
- def _gen_line_numbers(self) -> Iterator[str]:
- """Iterator with string representation of column numbers."""
- for i, _ in enumerate(self.ids):
- yield f" {i}"
- def _gen_columns(self) -> Iterator[str]:
- """Iterator with string representation of column names."""
- for col in self.ids:
- yield pprint_thing(col)
- class SeriesTableBuilder(TableBuilderAbstract):
- """
- Abstract builder for series info table.
- Parameters
- ----------
- info : SeriesInfo.
- Instance of SeriesInfo.
- """
- def __init__(self, *, info: SeriesInfo) -> None:
- self.info: SeriesInfo = info
- def get_lines(self) -> list[str]:
- self._lines = []
- self._fill_non_empty_info()
- return self._lines
- @property
- def data(self) -> Series:
- """Series."""
- return self.info.data
- def add_memory_usage_line(self) -> None:
- """Add line containing memory usage."""
- self._lines.append(f"memory usage: {self.memory_usage_string}")
- @abstractmethod
- def _fill_non_empty_info(self) -> None:
- """Add lines to the info table, pertaining to non-empty series."""
- class SeriesTableBuilderNonVerbose(SeriesTableBuilder):
- """
- Series info table builder for non-verbose output.
- """
- def _fill_non_empty_info(self) -> None:
- """Add lines to the info table, pertaining to non-empty series."""
- self.add_object_type_line()
- self.add_index_range_line()
- self.add_dtypes_line()
- if self.display_memory_usage:
- self.add_memory_usage_line()
- class SeriesTableBuilderVerbose(SeriesTableBuilder, TableBuilderVerboseMixin):
- """
- Series info table builder for verbose output.
- """
- def __init__(
- self,
- *,
- info: SeriesInfo,
- with_counts: bool,
- ) -> None:
- self.info = info
- self.with_counts = with_counts
- self.strrows: Sequence[Sequence[str]] = list(self._gen_rows())
- self.gross_column_widths: Sequence[int] = self._get_gross_column_widths()
- def _fill_non_empty_info(self) -> None:
- """Add lines to the info table, pertaining to non-empty series."""
- self.add_object_type_line()
- self.add_index_range_line()
- self.add_series_name_line()
- self.add_header_line()
- self.add_separator_line()
- self.add_body_lines()
- self.add_dtypes_line()
- if self.display_memory_usage:
- self.add_memory_usage_line()
- def add_series_name_line(self) -> None:
- self._lines.append(f"Series name: {self.data.name}")
- @property
- def headers(self) -> Sequence[str]:
- """Headers names of the columns in verbose table."""
- if self.with_counts:
- return ["Non-Null Count", "Dtype"]
- return ["Dtype"]
- def _gen_rows_without_counts(self) -> Iterator[Sequence[str]]:
- """Iterator with string representation of body data without counts."""
- yield from self._gen_dtypes()
- def _gen_rows_with_counts(self) -> Iterator[Sequence[str]]:
- """Iterator with string representation of body data with counts."""
- yield from zip(
- self._gen_non_null_counts(),
- self._gen_dtypes(),
- )
- def _get_dataframe_dtype_counts(df: DataFrame) -> Mapping[str, int]:
- """
- Create mapping between datatypes and their number of occurrences.
- """
- # groupby dtype.name to collect e.g. Categorical columns
- return df.dtypes.value_counts().groupby(lambda x: x.name).sum()
|