_util.py 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. from __future__ import annotations
  2. from typing import (
  3. TYPE_CHECKING,
  4. Any,
  5. Callable,
  6. Hashable,
  7. Iterable,
  8. Literal,
  9. MutableMapping,
  10. Sequence,
  11. TypeVar,
  12. overload,
  13. )
  14. from pandas.compat._optional import import_optional_dependency
  15. from pandas.core.dtypes.common import (
  16. is_integer,
  17. is_list_like,
  18. )
  19. if TYPE_CHECKING:
  20. from pandas.io.excel._base import ExcelWriter
  21. ExcelWriter_t = type[ExcelWriter]
  22. usecols_func = TypeVar("usecols_func", bound=Callable[[Hashable], object])
  23. _writers: MutableMapping[str, ExcelWriter_t] = {}
  24. def register_writer(klass: ExcelWriter_t) -> None:
  25. """
  26. Add engine to the excel writer registry.io.excel.
  27. You must use this method to integrate with ``to_excel``.
  28. Parameters
  29. ----------
  30. klass : ExcelWriter
  31. """
  32. if not callable(klass):
  33. raise ValueError("Can only register callables as engines")
  34. engine_name = klass._engine
  35. _writers[engine_name] = klass
  36. def get_default_engine(ext: str, mode: Literal["reader", "writer"] = "reader") -> str:
  37. """
  38. Return the default reader/writer for the given extension.
  39. Parameters
  40. ----------
  41. ext : str
  42. The excel file extension for which to get the default engine.
  43. mode : str {'reader', 'writer'}
  44. Whether to get the default engine for reading or writing.
  45. Either 'reader' or 'writer'
  46. Returns
  47. -------
  48. str
  49. The default engine for the extension.
  50. """
  51. _default_readers = {
  52. "xlsx": "openpyxl",
  53. "xlsm": "openpyxl",
  54. "xlsb": "pyxlsb",
  55. "xls": "xlrd",
  56. "ods": "odf",
  57. }
  58. _default_writers = {
  59. "xlsx": "openpyxl",
  60. "xlsm": "openpyxl",
  61. "xlsb": "pyxlsb",
  62. "ods": "odf",
  63. }
  64. assert mode in ["reader", "writer"]
  65. if mode == "writer":
  66. # Prefer xlsxwriter over openpyxl if installed
  67. xlsxwriter = import_optional_dependency("xlsxwriter", errors="warn")
  68. if xlsxwriter:
  69. _default_writers["xlsx"] = "xlsxwriter"
  70. return _default_writers[ext]
  71. else:
  72. return _default_readers[ext]
  73. def get_writer(engine_name: str) -> ExcelWriter_t:
  74. try:
  75. return _writers[engine_name]
  76. except KeyError as err:
  77. raise ValueError(f"No Excel writer '{engine_name}'") from err
  78. def _excel2num(x: str) -> int:
  79. """
  80. Convert Excel column name like 'AB' to 0-based column index.
  81. Parameters
  82. ----------
  83. x : str
  84. The Excel column name to convert to a 0-based column index.
  85. Returns
  86. -------
  87. num : int
  88. The column index corresponding to the name.
  89. Raises
  90. ------
  91. ValueError
  92. Part of the Excel column name was invalid.
  93. """
  94. index = 0
  95. for c in x.upper().strip():
  96. cp = ord(c)
  97. if cp < ord("A") or cp > ord("Z"):
  98. raise ValueError(f"Invalid column name: {x}")
  99. index = index * 26 + cp - ord("A") + 1
  100. return index - 1
  101. def _range2cols(areas: str) -> list[int]:
  102. """
  103. Convert comma separated list of column names and ranges to indices.
  104. Parameters
  105. ----------
  106. areas : str
  107. A string containing a sequence of column ranges (or areas).
  108. Returns
  109. -------
  110. cols : list
  111. A list of 0-based column indices.
  112. Examples
  113. --------
  114. >>> _range2cols('A:E')
  115. [0, 1, 2, 3, 4]
  116. >>> _range2cols('A,C,Z:AB')
  117. [0, 2, 25, 26, 27]
  118. """
  119. cols: list[int] = []
  120. for rng in areas.split(","):
  121. if ":" in rng:
  122. rngs = rng.split(":")
  123. cols.extend(range(_excel2num(rngs[0]), _excel2num(rngs[1]) + 1))
  124. else:
  125. cols.append(_excel2num(rng))
  126. return cols
  127. @overload
  128. def maybe_convert_usecols(usecols: str | list[int]) -> list[int]:
  129. ...
  130. @overload
  131. def maybe_convert_usecols(usecols: list[str]) -> list[str]:
  132. ...
  133. @overload
  134. def maybe_convert_usecols(usecols: usecols_func) -> usecols_func:
  135. ...
  136. @overload
  137. def maybe_convert_usecols(usecols: None) -> None:
  138. ...
  139. def maybe_convert_usecols(
  140. usecols: str | list[int] | list[str] | usecols_func | None,
  141. ) -> None | list[int] | list[str] | usecols_func:
  142. """
  143. Convert `usecols` into a compatible format for parsing in `parsers.py`.
  144. Parameters
  145. ----------
  146. usecols : object
  147. The use-columns object to potentially convert.
  148. Returns
  149. -------
  150. converted : object
  151. The compatible format of `usecols`.
  152. """
  153. if usecols is None:
  154. return usecols
  155. if is_integer(usecols):
  156. raise ValueError(
  157. "Passing an integer for `usecols` is no longer supported. "
  158. "Please pass in a list of int from 0 to `usecols` inclusive instead."
  159. )
  160. if isinstance(usecols, str):
  161. return _range2cols(usecols)
  162. return usecols
  163. @overload
  164. def validate_freeze_panes(freeze_panes: tuple[int, int]) -> Literal[True]:
  165. ...
  166. @overload
  167. def validate_freeze_panes(freeze_panes: None) -> Literal[False]:
  168. ...
  169. def validate_freeze_panes(freeze_panes: tuple[int, int] | None) -> bool:
  170. if freeze_panes is not None:
  171. if len(freeze_panes) == 2 and all(
  172. isinstance(item, int) for item in freeze_panes
  173. ):
  174. return True
  175. raise ValueError(
  176. "freeze_panes must be of form (row, column) "
  177. "where row and column are integers"
  178. )
  179. # freeze_panes wasn't specified, return False so it won't be applied
  180. # to output sheet
  181. return False
  182. def fill_mi_header(
  183. row: list[Hashable], control_row: list[bool]
  184. ) -> tuple[list[Hashable], list[bool]]:
  185. """
  186. Forward fill blank entries in row but only inside the same parent index.
  187. Used for creating headers in Multiindex.
  188. Parameters
  189. ----------
  190. row : list
  191. List of items in a single row.
  192. control_row : list of bool
  193. Helps to determine if particular column is in same parent index as the
  194. previous value. Used to stop propagation of empty cells between
  195. different indexes.
  196. Returns
  197. -------
  198. Returns changed row and control_row
  199. """
  200. last = row[0]
  201. for i in range(1, len(row)):
  202. if not control_row[i]:
  203. last = row[i]
  204. if row[i] == "" or row[i] is None:
  205. row[i] = last
  206. else:
  207. control_row[i] = False
  208. last = row[i]
  209. return row, control_row
  210. def pop_header_name(
  211. row: list[Hashable], index_col: int | Sequence[int]
  212. ) -> tuple[Hashable | None, list[Hashable]]:
  213. """
  214. Pop the header name for MultiIndex parsing.
  215. Parameters
  216. ----------
  217. row : list
  218. The data row to parse for the header name.
  219. index_col : int, list
  220. The index columns for our data. Assumed to be non-null.
  221. Returns
  222. -------
  223. header_name : str
  224. The extracted header name.
  225. trimmed_row : list
  226. The original data row with the header name removed.
  227. """
  228. # Pop out header name and fill w/blank.
  229. if is_list_like(index_col):
  230. assert isinstance(index_col, Iterable)
  231. i = max(index_col)
  232. else:
  233. assert not isinstance(index_col, Iterable)
  234. i = index_col
  235. header_name = row[i]
  236. header_name = None if header_name == "" else header_name
  237. return header_name, row[:i] + [""] + row[i + 1 :]
  238. def combine_kwargs(engine_kwargs: dict[str, Any] | None, kwargs: dict) -> dict:
  239. """
  240. Used to combine two sources of kwargs for the backend engine.
  241. Use of kwargs is deprecated, this function is solely for use in 1.3 and should
  242. be removed in 1.4/2.0. Also _base.ExcelWriter.__new__ ensures either engine_kwargs
  243. or kwargs must be None or empty respectively.
  244. Parameters
  245. ----------
  246. engine_kwargs: dict
  247. kwargs to be passed through to the engine.
  248. kwargs: dict
  249. kwargs to be psased through to the engine (deprecated)
  250. Returns
  251. -------
  252. engine_kwargs combined with kwargs
  253. """
  254. if engine_kwargs is None:
  255. result = {}
  256. else:
  257. result = engine_kwargs.copy()
  258. result.update(kwargs)
  259. return result