123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332 |
- from __future__ import annotations
- from typing import (
- TYPE_CHECKING,
- Any,
- Callable,
- Hashable,
- Iterable,
- Literal,
- MutableMapping,
- Sequence,
- TypeVar,
- overload,
- )
- from pandas.compat._optional import import_optional_dependency
- from pandas.core.dtypes.common import (
- is_integer,
- is_list_like,
- )
- if TYPE_CHECKING:
- from pandas.io.excel._base import ExcelWriter
- ExcelWriter_t = type[ExcelWriter]
- usecols_func = TypeVar("usecols_func", bound=Callable[[Hashable], object])
- _writers: MutableMapping[str, ExcelWriter_t] = {}
- def register_writer(klass: ExcelWriter_t) -> None:
- """
- Add engine to the excel writer registry.io.excel.
- You must use this method to integrate with ``to_excel``.
- Parameters
- ----------
- klass : ExcelWriter
- """
- if not callable(klass):
- raise ValueError("Can only register callables as engines")
- engine_name = klass._engine
- _writers[engine_name] = klass
- def get_default_engine(ext: str, mode: Literal["reader", "writer"] = "reader") -> str:
- """
- Return the default reader/writer for the given extension.
- Parameters
- ----------
- ext : str
- The excel file extension for which to get the default engine.
- mode : str {'reader', 'writer'}
- Whether to get the default engine for reading or writing.
- Either 'reader' or 'writer'
- Returns
- -------
- str
- The default engine for the extension.
- """
- _default_readers = {
- "xlsx": "openpyxl",
- "xlsm": "openpyxl",
- "xlsb": "pyxlsb",
- "xls": "xlrd",
- "ods": "odf",
- }
- _default_writers = {
- "xlsx": "openpyxl",
- "xlsm": "openpyxl",
- "xlsb": "pyxlsb",
- "ods": "odf",
- }
- assert mode in ["reader", "writer"]
- if mode == "writer":
- # Prefer xlsxwriter over openpyxl if installed
- xlsxwriter = import_optional_dependency("xlsxwriter", errors="warn")
- if xlsxwriter:
- _default_writers["xlsx"] = "xlsxwriter"
- return _default_writers[ext]
- else:
- return _default_readers[ext]
- def get_writer(engine_name: str) -> ExcelWriter_t:
- try:
- return _writers[engine_name]
- except KeyError as err:
- raise ValueError(f"No Excel writer '{engine_name}'") from err
- def _excel2num(x: str) -> int:
- """
- Convert Excel column name like 'AB' to 0-based column index.
- Parameters
- ----------
- x : str
- The Excel column name to convert to a 0-based column index.
- Returns
- -------
- num : int
- The column index corresponding to the name.
- Raises
- ------
- ValueError
- Part of the Excel column name was invalid.
- """
- index = 0
- for c in x.upper().strip():
- cp = ord(c)
- if cp < ord("A") or cp > ord("Z"):
- raise ValueError(f"Invalid column name: {x}")
- index = index * 26 + cp - ord("A") + 1
- return index - 1
- def _range2cols(areas: str) -> list[int]:
- """
- Convert comma separated list of column names and ranges to indices.
- Parameters
- ----------
- areas : str
- A string containing a sequence of column ranges (or areas).
- Returns
- -------
- cols : list
- A list of 0-based column indices.
- Examples
- --------
- >>> _range2cols('A:E')
- [0, 1, 2, 3, 4]
- >>> _range2cols('A,C,Z:AB')
- [0, 2, 25, 26, 27]
- """
- cols: list[int] = []
- for rng in areas.split(","):
- if ":" in rng:
- rngs = rng.split(":")
- cols.extend(range(_excel2num(rngs[0]), _excel2num(rngs[1]) + 1))
- else:
- cols.append(_excel2num(rng))
- return cols
- @overload
- def maybe_convert_usecols(usecols: str | list[int]) -> list[int]:
- ...
- @overload
- def maybe_convert_usecols(usecols: list[str]) -> list[str]:
- ...
- @overload
- def maybe_convert_usecols(usecols: usecols_func) -> usecols_func:
- ...
- @overload
- def maybe_convert_usecols(usecols: None) -> None:
- ...
- def maybe_convert_usecols(
- usecols: str | list[int] | list[str] | usecols_func | None,
- ) -> None | list[int] | list[str] | usecols_func:
- """
- Convert `usecols` into a compatible format for parsing in `parsers.py`.
- Parameters
- ----------
- usecols : object
- The use-columns object to potentially convert.
- Returns
- -------
- converted : object
- The compatible format of `usecols`.
- """
- if usecols is None:
- return usecols
- if is_integer(usecols):
- raise ValueError(
- "Passing an integer for `usecols` is no longer supported. "
- "Please pass in a list of int from 0 to `usecols` inclusive instead."
- )
- if isinstance(usecols, str):
- return _range2cols(usecols)
- return usecols
- @overload
- def validate_freeze_panes(freeze_panes: tuple[int, int]) -> Literal[True]:
- ...
- @overload
- def validate_freeze_panes(freeze_panes: None) -> Literal[False]:
- ...
- def validate_freeze_panes(freeze_panes: tuple[int, int] | None) -> bool:
- if freeze_panes is not None:
- if len(freeze_panes) == 2 and all(
- isinstance(item, int) for item in freeze_panes
- ):
- return True
- raise ValueError(
- "freeze_panes must be of form (row, column) "
- "where row and column are integers"
- )
- # freeze_panes wasn't specified, return False so it won't be applied
- # to output sheet
- return False
- def fill_mi_header(
- row: list[Hashable], control_row: list[bool]
- ) -> tuple[list[Hashable], list[bool]]:
- """
- Forward fill blank entries in row but only inside the same parent index.
- Used for creating headers in Multiindex.
- Parameters
- ----------
- row : list
- List of items in a single row.
- control_row : list of bool
- Helps to determine if particular column is in same parent index as the
- previous value. Used to stop propagation of empty cells between
- different indexes.
- Returns
- -------
- Returns changed row and control_row
- """
- last = row[0]
- for i in range(1, len(row)):
- if not control_row[i]:
- last = row[i]
- if row[i] == "" or row[i] is None:
- row[i] = last
- else:
- control_row[i] = False
- last = row[i]
- return row, control_row
- def pop_header_name(
- row: list[Hashable], index_col: int | Sequence[int]
- ) -> tuple[Hashable | None, list[Hashable]]:
- """
- Pop the header name for MultiIndex parsing.
- Parameters
- ----------
- row : list
- The data row to parse for the header name.
- index_col : int, list
- The index columns for our data. Assumed to be non-null.
- Returns
- -------
- header_name : str
- The extracted header name.
- trimmed_row : list
- The original data row with the header name removed.
- """
- # Pop out header name and fill w/blank.
- if is_list_like(index_col):
- assert isinstance(index_col, Iterable)
- i = max(index_col)
- else:
- assert not isinstance(index_col, Iterable)
- i = index_col
- header_name = row[i]
- header_name = None if header_name == "" else header_name
- return header_name, row[:i] + [""] + row[i + 1 :]
- def combine_kwargs(engine_kwargs: dict[str, Any] | None, kwargs: dict) -> dict:
- """
- Used to combine two sources of kwargs for the backend engine.
- Use of kwargs is deprecated, this function is solely for use in 1.3 and should
- be removed in 1.4/2.0. Also _base.ExcelWriter.__new__ ensures either engine_kwargs
- or kwargs must be None or empty respectively.
- Parameters
- ----------
- engine_kwargs: dict
- kwargs to be passed through to the engine.
- kwargs: dict
- kwargs to be psased through to the engine (deprecated)
- Returns
- -------
- engine_kwargs combined with kwargs
- """
- if engine_kwargs is None:
- result = {}
- else:
- result = engine_kwargs.copy()
- result.update(kwargs)
- return result
|