123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207 |
- from __future__ import annotations
- from typing import Literal
- import warnings
- from pandas.util._exceptions import find_stack_level
- from pandas.core.dtypes.cast import maybe_box_native
- from pandas.core.dtypes.common import (
- is_extension_array_dtype,
- is_object_dtype,
- )
- from pandas import DataFrame
- from pandas.core import common as com
- def to_dict(
- df: DataFrame,
- orient: Literal[
- "dict", "list", "series", "split", "tight", "records", "index"
- ] = "dict",
- into: type[dict] = dict,
- index: bool = True,
- ) -> dict | list[dict]:
- """
- Convert the DataFrame to a dictionary.
- The type of the key-value pairs can be customized with the parameters
- (see below).
- Parameters
- ----------
- orient : str {'dict', 'list', 'series', 'split', 'tight', 'records', 'index'}
- Determines the type of the values of the dictionary.
- - 'dict' (default) : dict like {column -> {index -> value}}
- - 'list' : dict like {column -> [values]}
- - 'series' : dict like {column -> Series(values)}
- - 'split' : dict like
- {'index' -> [index], 'columns' -> [columns], 'data' -> [values]}
- - 'tight' : dict like
- {'index' -> [index], 'columns' -> [columns], 'data' -> [values],
- 'index_names' -> [index.names], 'column_names' -> [column.names]}
- - 'records' : list like
- [{column -> value}, ... , {column -> value}]
- - 'index' : dict like {index -> {column -> value}}
- .. versionadded:: 1.4.0
- 'tight' as an allowed value for the ``orient`` argument
- into : class, default dict
- The collections.abc.Mapping subclass used for all Mappings
- in the return value. Can be the actual class or an empty
- instance of the mapping type you want. If you want a
- collections.defaultdict, you must pass it initialized.
- index : bool, default True
- Whether to include the index item (and index_names item if `orient`
- is 'tight') in the returned dictionary. Can only be ``False``
- when `orient` is 'split' or 'tight'.
- .. versionadded:: 2.0.0
- Returns
- -------
- dict, list or collections.abc.Mapping
- Return a collections.abc.Mapping object representing the DataFrame.
- The resulting transformation depends on the `orient` parameter.
- """
- if not df.columns.is_unique:
- warnings.warn(
- "DataFrame columns are not unique, some columns will be omitted.",
- UserWarning,
- stacklevel=find_stack_level(),
- )
- # GH16122
- into_c = com.standardize_mapping(into)
- # error: Incompatible types in assignment (expression has type "str",
- # variable has type "Literal['dict', 'list', 'series', 'split', 'tight',
- # 'records', 'index']")
- orient = orient.lower() # type: ignore[assignment]
- if not index and orient not in ["split", "tight"]:
- raise ValueError(
- "'index=False' is only valid when 'orient' is 'split' or 'tight'"
- )
- if orient == "series":
- # GH46470 Return quickly if orient series to avoid creating dtype objects
- return into_c((k, v) for k, v in df.items())
- box_native_indices = [
- i
- for i, col_dtype in enumerate(df.dtypes.values)
- if is_object_dtype(col_dtype) or is_extension_array_dtype(col_dtype)
- ]
- are_all_object_dtype_cols = len(box_native_indices) == len(df.dtypes)
- if orient == "dict":
- return into_c((k, v.to_dict(into)) for k, v in df.items())
- elif orient == "list":
- object_dtype_indices_as_set = set(box_native_indices)
- return into_c(
- (
- k,
- list(map(maybe_box_native, v.tolist()))
- if i in object_dtype_indices_as_set
- else v.tolist(),
- )
- for i, (k, v) in enumerate(df.items())
- )
- elif orient == "split":
- data = df._create_data_for_split_and_tight_to_dict(
- are_all_object_dtype_cols, box_native_indices
- )
- return into_c(
- ((("index", df.index.tolist()),) if index else ())
- + (
- ("columns", df.columns.tolist()),
- ("data", data),
- )
- )
- elif orient == "tight":
- data = df._create_data_for_split_and_tight_to_dict(
- are_all_object_dtype_cols, box_native_indices
- )
- return into_c(
- ((("index", df.index.tolist()),) if index else ())
- + (
- ("columns", df.columns.tolist()),
- (
- "data",
- [
- list(map(maybe_box_native, t))
- for t in df.itertuples(index=False, name=None)
- ],
- ),
- )
- + ((("index_names", list(df.index.names)),) if index else ())
- + (("column_names", list(df.columns.names)),)
- )
- elif orient == "records":
- columns = df.columns.tolist()
- if are_all_object_dtype_cols:
- rows = (
- dict(zip(columns, row)) for row in df.itertuples(index=False, name=None)
- )
- return [
- into_c((k, maybe_box_native(v)) for k, v in row.items()) for row in rows
- ]
- else:
- data = [
- into_c(zip(columns, t)) for t in df.itertuples(index=False, name=None)
- ]
- if box_native_indices:
- object_dtype_indices_as_set = set(box_native_indices)
- object_dtype_cols = {
- col
- for i, col in enumerate(df.columns)
- if i in object_dtype_indices_as_set
- }
- for row in data:
- for col in object_dtype_cols:
- row[col] = maybe_box_native(row[col])
- return data
- elif orient == "index":
- if not df.index.is_unique:
- raise ValueError("DataFrame index must be unique for orient='index'.")
- columns = df.columns.tolist()
- if are_all_object_dtype_cols:
- return into_c(
- (t[0], dict(zip(df.columns, map(maybe_box_native, t[1:]))))
- for t in df.itertuples(name=None)
- )
- elif box_native_indices:
- object_dtype_indices_as_set = set(box_native_indices)
- is_object_dtype_by_index = [
- i in object_dtype_indices_as_set for i in range(len(df.columns))
- ]
- return into_c(
- (
- t[0],
- {
- columns[i]: maybe_box_native(v)
- if is_object_dtype_by_index[i]
- else v
- for i, v in enumerate(t[1:])
- },
- )
- for t in df.itertuples(name=None)
- )
- else:
- return into_c(
- (t[0], dict(zip(df.columns, t[1:]))) for t in df.itertuples(name=None)
- )
- else:
- raise ValueError(f"orient '{orient}' not understood")
|