excel.py 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950
  1. """
  2. Utilities for conversion to writer-agnostic Excel representation.
  3. """
  4. from __future__ import annotations
  5. from functools import (
  6. lru_cache,
  7. reduce,
  8. )
  9. import itertools
  10. import re
  11. from typing import (
  12. Any,
  13. Callable,
  14. Hashable,
  15. Iterable,
  16. Mapping,
  17. Sequence,
  18. cast,
  19. )
  20. import warnings
  21. import numpy as np
  22. from pandas._libs.lib import is_list_like
  23. from pandas._typing import (
  24. IndexLabel,
  25. StorageOptions,
  26. )
  27. from pandas.util._decorators import doc
  28. from pandas.util._exceptions import find_stack_level
  29. from pandas.core.dtypes import missing
  30. from pandas.core.dtypes.common import (
  31. is_float,
  32. is_scalar,
  33. )
  34. from pandas import (
  35. DataFrame,
  36. Index,
  37. MultiIndex,
  38. PeriodIndex,
  39. )
  40. import pandas.core.common as com
  41. from pandas.core.shared_docs import _shared_docs
  42. from pandas.io.formats._color_data import CSS4_COLORS
  43. from pandas.io.formats.css import (
  44. CSSResolver,
  45. CSSWarning,
  46. )
  47. from pandas.io.formats.format import get_level_lengths
  48. from pandas.io.formats.printing import pprint_thing
  49. class ExcelCell:
  50. __fields__ = ("row", "col", "val", "style", "mergestart", "mergeend")
  51. __slots__ = __fields__
  52. def __init__(
  53. self,
  54. row: int,
  55. col: int,
  56. val,
  57. style=None,
  58. mergestart: int | None = None,
  59. mergeend: int | None = None,
  60. ) -> None:
  61. self.row = row
  62. self.col = col
  63. self.val = val
  64. self.style = style
  65. self.mergestart = mergestart
  66. self.mergeend = mergeend
  67. class CssExcelCell(ExcelCell):
  68. def __init__(
  69. self,
  70. row: int,
  71. col: int,
  72. val,
  73. style: dict | None,
  74. css_styles: dict[tuple[int, int], list[tuple[str, Any]]] | None,
  75. css_row: int,
  76. css_col: int,
  77. css_converter: Callable | None,
  78. **kwargs,
  79. ) -> None:
  80. if css_styles and css_converter:
  81. # Use dict to get only one (case-insensitive) declaration per property
  82. declaration_dict = {
  83. prop.lower(): val for prop, val in css_styles[css_row, css_col]
  84. }
  85. # Convert to frozenset for order-invariant caching
  86. unique_declarations = frozenset(declaration_dict.items())
  87. style = css_converter(unique_declarations)
  88. super().__init__(row=row, col=col, val=val, style=style, **kwargs)
  89. class CSSToExcelConverter:
  90. """
  91. A callable for converting CSS declarations to ExcelWriter styles
  92. Supports parts of CSS 2.2, with minimal CSS 3.0 support (e.g. text-shadow),
  93. focusing on font styling, backgrounds, borders and alignment.
  94. Operates by first computing CSS styles in a fairly generic
  95. way (see :meth:`compute_css`) then determining Excel style
  96. properties from CSS properties (see :meth:`build_xlstyle`).
  97. Parameters
  98. ----------
  99. inherited : str, optional
  100. CSS declarations understood to be the containing scope for the
  101. CSS processed by :meth:`__call__`.
  102. """
  103. NAMED_COLORS = CSS4_COLORS
  104. VERTICAL_MAP = {
  105. "top": "top",
  106. "text-top": "top",
  107. "middle": "center",
  108. "baseline": "bottom",
  109. "bottom": "bottom",
  110. "text-bottom": "bottom",
  111. # OpenXML also has 'justify', 'distributed'
  112. }
  113. BOLD_MAP = {
  114. "bold": True,
  115. "bolder": True,
  116. "600": True,
  117. "700": True,
  118. "800": True,
  119. "900": True,
  120. "normal": False,
  121. "lighter": False,
  122. "100": False,
  123. "200": False,
  124. "300": False,
  125. "400": False,
  126. "500": False,
  127. }
  128. ITALIC_MAP = {
  129. "normal": False,
  130. "italic": True,
  131. "oblique": True,
  132. }
  133. FAMILY_MAP = {
  134. "serif": 1, # roman
  135. "sans-serif": 2, # swiss
  136. "cursive": 4, # script
  137. "fantasy": 5, # decorative
  138. }
  139. BORDER_STYLE_MAP = {
  140. style.lower(): style
  141. for style in [
  142. "dashed",
  143. "mediumDashDot",
  144. "dashDotDot",
  145. "hair",
  146. "dotted",
  147. "mediumDashDotDot",
  148. "double",
  149. "dashDot",
  150. "slantDashDot",
  151. "mediumDashed",
  152. ]
  153. }
  154. # NB: Most of the methods here could be classmethods, as only __init__
  155. # and __call__ make use of instance attributes. We leave them as
  156. # instancemethods so that users can easily experiment with extensions
  157. # without monkey-patching.
  158. inherited: dict[str, str] | None
  159. def __init__(self, inherited: str | None = None) -> None:
  160. if inherited is not None:
  161. self.inherited = self.compute_css(inherited)
  162. else:
  163. self.inherited = None
  164. # We should avoid lru_cache on the __call__ method.
  165. # Otherwise once the method __call__ has been called
  166. # garbage collection no longer deletes the instance.
  167. self._call_cached = lru_cache(maxsize=None)(self._call_uncached)
  168. compute_css = CSSResolver()
  169. def __call__(
  170. self, declarations: str | frozenset[tuple[str, str]]
  171. ) -> dict[str, dict[str, str]]:
  172. """
  173. Convert CSS declarations to ExcelWriter style.
  174. Parameters
  175. ----------
  176. declarations : str | frozenset[tuple[str, str]]
  177. CSS string or set of CSS declaration tuples.
  178. e.g. "font-weight: bold; background: blue" or
  179. {("font-weight", "bold"), ("background", "blue")}
  180. Returns
  181. -------
  182. xlstyle : dict
  183. A style as interpreted by ExcelWriter when found in
  184. ExcelCell.style.
  185. """
  186. return self._call_cached(declarations)
  187. def _call_uncached(
  188. self, declarations: str | frozenset[tuple[str, str]]
  189. ) -> dict[str, dict[str, str]]:
  190. properties = self.compute_css(declarations, self.inherited)
  191. return self.build_xlstyle(properties)
  192. def build_xlstyle(self, props: Mapping[str, str]) -> dict[str, dict[str, str]]:
  193. out = {
  194. "alignment": self.build_alignment(props),
  195. "border": self.build_border(props),
  196. "fill": self.build_fill(props),
  197. "font": self.build_font(props),
  198. "number_format": self.build_number_format(props),
  199. }
  200. # TODO: handle cell width and height: needs support in pandas.io.excel
  201. def remove_none(d: dict[str, str | None]) -> None:
  202. """Remove key where value is None, through nested dicts"""
  203. for k, v in list(d.items()):
  204. if v is None:
  205. del d[k]
  206. elif isinstance(v, dict):
  207. remove_none(v)
  208. if not v:
  209. del d[k]
  210. remove_none(out)
  211. return out
  212. def build_alignment(self, props: Mapping[str, str]) -> dict[str, bool | str | None]:
  213. # TODO: text-indent, padding-left -> alignment.indent
  214. return {
  215. "horizontal": props.get("text-align"),
  216. "vertical": self._get_vertical_alignment(props),
  217. "wrap_text": self._get_is_wrap_text(props),
  218. }
  219. def _get_vertical_alignment(self, props: Mapping[str, str]) -> str | None:
  220. vertical_align = props.get("vertical-align")
  221. if vertical_align:
  222. return self.VERTICAL_MAP.get(vertical_align)
  223. return None
  224. def _get_is_wrap_text(self, props: Mapping[str, str]) -> bool | None:
  225. if props.get("white-space") is None:
  226. return None
  227. return bool(props["white-space"] not in ("nowrap", "pre", "pre-line"))
  228. def build_border(
  229. self, props: Mapping[str, str]
  230. ) -> dict[str, dict[str, str | None]]:
  231. return {
  232. side: {
  233. "style": self._border_style(
  234. props.get(f"border-{side}-style"),
  235. props.get(f"border-{side}-width"),
  236. self.color_to_excel(props.get(f"border-{side}-color")),
  237. ),
  238. "color": self.color_to_excel(props.get(f"border-{side}-color")),
  239. }
  240. for side in ["top", "right", "bottom", "left"]
  241. }
  242. def _border_style(self, style: str | None, width: str | None, color: str | None):
  243. # convert styles and widths to openxml, one of:
  244. # 'dashDot'
  245. # 'dashDotDot'
  246. # 'dashed'
  247. # 'dotted'
  248. # 'double'
  249. # 'hair'
  250. # 'medium'
  251. # 'mediumDashDot'
  252. # 'mediumDashDotDot'
  253. # 'mediumDashed'
  254. # 'slantDashDot'
  255. # 'thick'
  256. # 'thin'
  257. if width is None and style is None and color is None:
  258. # Return None will remove "border" from style dictionary
  259. return None
  260. if width is None and style is None:
  261. # Return "none" will keep "border" in style dictionary
  262. return "none"
  263. if style in ("none", "hidden"):
  264. return "none"
  265. width_name = self._get_width_name(width)
  266. if width_name is None:
  267. return "none"
  268. if style in (None, "groove", "ridge", "inset", "outset", "solid"):
  269. # not handled
  270. return width_name
  271. if style == "double":
  272. return "double"
  273. if style == "dotted":
  274. if width_name in ("hair", "thin"):
  275. return "dotted"
  276. return "mediumDashDotDot"
  277. if style == "dashed":
  278. if width_name in ("hair", "thin"):
  279. return "dashed"
  280. return "mediumDashed"
  281. elif style in self.BORDER_STYLE_MAP:
  282. # Excel-specific styles
  283. return self.BORDER_STYLE_MAP[style]
  284. else:
  285. warnings.warn(
  286. f"Unhandled border style format: {repr(style)}",
  287. CSSWarning,
  288. stacklevel=find_stack_level(),
  289. )
  290. return "none"
  291. def _get_width_name(self, width_input: str | None) -> str | None:
  292. width = self._width_to_float(width_input)
  293. if width < 1e-5:
  294. return None
  295. elif width < 1.3:
  296. return "thin"
  297. elif width < 2.8:
  298. return "medium"
  299. return "thick"
  300. def _width_to_float(self, width: str | None) -> float:
  301. if width is None:
  302. width = "2pt"
  303. return self._pt_to_float(width)
  304. def _pt_to_float(self, pt_string: str) -> float:
  305. assert pt_string.endswith("pt")
  306. return float(pt_string.rstrip("pt"))
  307. def build_fill(self, props: Mapping[str, str]):
  308. # TODO: perhaps allow for special properties
  309. # -excel-pattern-bgcolor and -excel-pattern-type
  310. fill_color = props.get("background-color")
  311. if fill_color not in (None, "transparent", "none"):
  312. return {"fgColor": self.color_to_excel(fill_color), "patternType": "solid"}
  313. def build_number_format(self, props: Mapping[str, str]) -> dict[str, str | None]:
  314. fc = props.get("number-format")
  315. fc = fc.replace("§", ";") if isinstance(fc, str) else fc
  316. return {"format_code": fc}
  317. def build_font(
  318. self, props: Mapping[str, str]
  319. ) -> dict[str, bool | float | str | None]:
  320. font_names = self._get_font_names(props)
  321. decoration = self._get_decoration(props)
  322. return {
  323. "name": font_names[0] if font_names else None,
  324. "family": self._select_font_family(font_names),
  325. "size": self._get_font_size(props),
  326. "bold": self._get_is_bold(props),
  327. "italic": self._get_is_italic(props),
  328. "underline": ("single" if "underline" in decoration else None),
  329. "strike": ("line-through" in decoration) or None,
  330. "color": self.color_to_excel(props.get("color")),
  331. # shadow if nonzero digit before shadow color
  332. "shadow": self._get_shadow(props),
  333. }
  334. def _get_is_bold(self, props: Mapping[str, str]) -> bool | None:
  335. weight = props.get("font-weight")
  336. if weight:
  337. return self.BOLD_MAP.get(weight)
  338. return None
  339. def _get_is_italic(self, props: Mapping[str, str]) -> bool | None:
  340. font_style = props.get("font-style")
  341. if font_style:
  342. return self.ITALIC_MAP.get(font_style)
  343. return None
  344. def _get_decoration(self, props: Mapping[str, str]) -> Sequence[str]:
  345. decoration = props.get("text-decoration")
  346. if decoration is not None:
  347. return decoration.split()
  348. else:
  349. return ()
  350. def _get_underline(self, decoration: Sequence[str]) -> str | None:
  351. if "underline" in decoration:
  352. return "single"
  353. return None
  354. def _get_shadow(self, props: Mapping[str, str]) -> bool | None:
  355. if "text-shadow" in props:
  356. return bool(re.search("^[^#(]*[1-9]", props["text-shadow"]))
  357. return None
  358. def _get_font_names(self, props: Mapping[str, str]) -> Sequence[str]:
  359. font_names_tmp = re.findall(
  360. r"""(?x)
  361. (
  362. "(?:[^"]|\\")+"
  363. |
  364. '(?:[^']|\\')+'
  365. |
  366. [^'",]+
  367. )(?=,|\s*$)
  368. """,
  369. props.get("font-family", ""),
  370. )
  371. font_names = []
  372. for name in font_names_tmp:
  373. if name[:1] == '"':
  374. name = name[1:-1].replace('\\"', '"')
  375. elif name[:1] == "'":
  376. name = name[1:-1].replace("\\'", "'")
  377. else:
  378. name = name.strip()
  379. if name:
  380. font_names.append(name)
  381. return font_names
  382. def _get_font_size(self, props: Mapping[str, str]) -> float | None:
  383. size = props.get("font-size")
  384. if size is None:
  385. return size
  386. return self._pt_to_float(size)
  387. def _select_font_family(self, font_names) -> int | None:
  388. family = None
  389. for name in font_names:
  390. family = self.FAMILY_MAP.get(name)
  391. if family:
  392. break
  393. return family
  394. def color_to_excel(self, val: str | None) -> str | None:
  395. if val is None:
  396. return None
  397. if self._is_hex_color(val):
  398. return self._convert_hex_to_excel(val)
  399. try:
  400. return self.NAMED_COLORS[val]
  401. except KeyError:
  402. warnings.warn(
  403. f"Unhandled color format: {repr(val)}",
  404. CSSWarning,
  405. stacklevel=find_stack_level(),
  406. )
  407. return None
  408. def _is_hex_color(self, color_string: str) -> bool:
  409. return bool(color_string.startswith("#"))
  410. def _convert_hex_to_excel(self, color_string: str) -> str:
  411. code = color_string.lstrip("#")
  412. if self._is_shorthand_color(color_string):
  413. return (code[0] * 2 + code[1] * 2 + code[2] * 2).upper()
  414. else:
  415. return code.upper()
  416. def _is_shorthand_color(self, color_string: str) -> bool:
  417. """Check if color code is shorthand.
  418. #FFF is a shorthand as opposed to full #FFFFFF.
  419. """
  420. code = color_string.lstrip("#")
  421. if len(code) == 3:
  422. return True
  423. elif len(code) == 6:
  424. return False
  425. else:
  426. raise ValueError(f"Unexpected color {color_string}")
  427. class ExcelFormatter:
  428. """
  429. Class for formatting a DataFrame to a list of ExcelCells,
  430. Parameters
  431. ----------
  432. df : DataFrame or Styler
  433. na_rep: na representation
  434. float_format : str, default None
  435. Format string for floating point numbers
  436. cols : sequence, optional
  437. Columns to write
  438. header : bool or sequence of str, default True
  439. Write out column names. If a list of string is given it is
  440. assumed to be aliases for the column names
  441. index : bool, default True
  442. output row names (index)
  443. index_label : str or sequence, default None
  444. Column label for index column(s) if desired. If None is given, and
  445. `header` and `index` are True, then the index names are used. A
  446. sequence should be given if the DataFrame uses MultiIndex.
  447. merge_cells : bool, default False
  448. Format MultiIndex and Hierarchical Rows as merged cells.
  449. inf_rep : str, default `'inf'`
  450. representation for np.inf values (which aren't representable in Excel)
  451. A `'-'` sign will be added in front of -inf.
  452. style_converter : callable, optional
  453. This translates Styler styles (CSS) into ExcelWriter styles.
  454. Defaults to ``CSSToExcelConverter()``.
  455. It should have signature css_declarations string -> excel style.
  456. This is only called for body cells.
  457. """
  458. max_rows = 2**20
  459. max_cols = 2**14
  460. def __init__(
  461. self,
  462. df,
  463. na_rep: str = "",
  464. float_format: str | None = None,
  465. cols: Sequence[Hashable] | None = None,
  466. header: Sequence[Hashable] | bool = True,
  467. index: bool = True,
  468. index_label: IndexLabel | None = None,
  469. merge_cells: bool = False,
  470. inf_rep: str = "inf",
  471. style_converter: Callable | None = None,
  472. ) -> None:
  473. self.rowcounter = 0
  474. self.na_rep = na_rep
  475. if not isinstance(df, DataFrame):
  476. self.styler = df
  477. self.styler._compute() # calculate applied styles
  478. df = df.data
  479. if style_converter is None:
  480. style_converter = CSSToExcelConverter()
  481. self.style_converter: Callable | None = style_converter
  482. else:
  483. self.styler = None
  484. self.style_converter = None
  485. self.df = df
  486. if cols is not None:
  487. # all missing, raise
  488. if not len(Index(cols).intersection(df.columns)):
  489. raise KeyError("passes columns are not ALL present dataframe")
  490. if len(Index(cols).intersection(df.columns)) != len(set(cols)):
  491. # Deprecated in GH#17295, enforced in 1.0.0
  492. raise KeyError("Not all names specified in 'columns' are found")
  493. self.df = df.reindex(columns=cols)
  494. self.columns = self.df.columns
  495. self.float_format = float_format
  496. self.index = index
  497. self.index_label = index_label
  498. self.header = header
  499. self.merge_cells = merge_cells
  500. self.inf_rep = inf_rep
  501. @property
  502. def header_style(self) -> dict[str, dict[str, str | bool]]:
  503. return {
  504. "font": {"bold": True},
  505. "borders": {
  506. "top": "thin",
  507. "right": "thin",
  508. "bottom": "thin",
  509. "left": "thin",
  510. },
  511. "alignment": {"horizontal": "center", "vertical": "top"},
  512. }
  513. def _format_value(self, val):
  514. if is_scalar(val) and missing.isna(val):
  515. val = self.na_rep
  516. elif is_float(val):
  517. if missing.isposinf_scalar(val):
  518. val = self.inf_rep
  519. elif missing.isneginf_scalar(val):
  520. val = f"-{self.inf_rep}"
  521. elif self.float_format is not None:
  522. val = float(self.float_format % val)
  523. if getattr(val, "tzinfo", None) is not None:
  524. raise ValueError(
  525. "Excel does not support datetimes with "
  526. "timezones. Please ensure that datetimes "
  527. "are timezone unaware before writing to Excel."
  528. )
  529. return val
  530. def _format_header_mi(self) -> Iterable[ExcelCell]:
  531. if self.columns.nlevels > 1:
  532. if not self.index:
  533. raise NotImplementedError(
  534. "Writing to Excel with MultiIndex columns and no "
  535. "index ('index'=False) is not yet implemented."
  536. )
  537. if not (self._has_aliases or self.header):
  538. return
  539. columns = self.columns
  540. level_strs = columns.format(
  541. sparsify=self.merge_cells, adjoin=False, names=False
  542. )
  543. level_lengths = get_level_lengths(level_strs)
  544. coloffset = 0
  545. lnum = 0
  546. if self.index and isinstance(self.df.index, MultiIndex):
  547. coloffset = len(self.df.index[0]) - 1
  548. if self.merge_cells:
  549. # Format multi-index as a merged cells.
  550. for lnum, name in enumerate(columns.names):
  551. yield ExcelCell(
  552. row=lnum,
  553. col=coloffset,
  554. val=name,
  555. style=self.header_style,
  556. )
  557. for lnum, (spans, levels, level_codes) in enumerate(
  558. zip(level_lengths, columns.levels, columns.codes)
  559. ):
  560. values = levels.take(level_codes)
  561. for i, span_val in spans.items():
  562. mergestart, mergeend = None, None
  563. if span_val > 1:
  564. mergestart, mergeend = lnum, coloffset + i + span_val
  565. yield CssExcelCell(
  566. row=lnum,
  567. col=coloffset + i + 1,
  568. val=values[i],
  569. style=self.header_style,
  570. css_styles=getattr(self.styler, "ctx_columns", None),
  571. css_row=lnum,
  572. css_col=i,
  573. css_converter=self.style_converter,
  574. mergestart=mergestart,
  575. mergeend=mergeend,
  576. )
  577. else:
  578. # Format in legacy format with dots to indicate levels.
  579. for i, values in enumerate(zip(*level_strs)):
  580. v = ".".join(map(pprint_thing, values))
  581. yield CssExcelCell(
  582. row=lnum,
  583. col=coloffset + i + 1,
  584. val=v,
  585. style=self.header_style,
  586. css_styles=getattr(self.styler, "ctx_columns", None),
  587. css_row=lnum,
  588. css_col=i,
  589. css_converter=self.style_converter,
  590. )
  591. self.rowcounter = lnum
  592. def _format_header_regular(self) -> Iterable[ExcelCell]:
  593. if self._has_aliases or self.header:
  594. coloffset = 0
  595. if self.index:
  596. coloffset = 1
  597. if isinstance(self.df.index, MultiIndex):
  598. coloffset = len(self.df.index.names)
  599. colnames = self.columns
  600. if self._has_aliases:
  601. self.header = cast(Sequence, self.header)
  602. if len(self.header) != len(self.columns):
  603. raise ValueError(
  604. f"Writing {len(self.columns)} cols "
  605. f"but got {len(self.header)} aliases"
  606. )
  607. colnames = self.header
  608. for colindex, colname in enumerate(colnames):
  609. yield CssExcelCell(
  610. row=self.rowcounter,
  611. col=colindex + coloffset,
  612. val=colname,
  613. style=self.header_style,
  614. css_styles=getattr(self.styler, "ctx_columns", None),
  615. css_row=0,
  616. css_col=colindex,
  617. css_converter=self.style_converter,
  618. )
  619. def _format_header(self) -> Iterable[ExcelCell]:
  620. gen: Iterable[ExcelCell]
  621. if isinstance(self.columns, MultiIndex):
  622. gen = self._format_header_mi()
  623. else:
  624. gen = self._format_header_regular()
  625. gen2: Iterable[ExcelCell] = ()
  626. if self.df.index.names:
  627. row = [x if x is not None else "" for x in self.df.index.names] + [
  628. ""
  629. ] * len(self.columns)
  630. if reduce(lambda x, y: x and y, map(lambda x: x != "", row)):
  631. gen2 = (
  632. ExcelCell(self.rowcounter, colindex, val, self.header_style)
  633. for colindex, val in enumerate(row)
  634. )
  635. self.rowcounter += 1
  636. return itertools.chain(gen, gen2)
  637. def _format_body(self) -> Iterable[ExcelCell]:
  638. if isinstance(self.df.index, MultiIndex):
  639. return self._format_hierarchical_rows()
  640. else:
  641. return self._format_regular_rows()
  642. def _format_regular_rows(self) -> Iterable[ExcelCell]:
  643. if self._has_aliases or self.header:
  644. self.rowcounter += 1
  645. # output index and index_label?
  646. if self.index:
  647. # check aliases
  648. # if list only take first as this is not a MultiIndex
  649. if self.index_label and isinstance(
  650. self.index_label, (list, tuple, np.ndarray, Index)
  651. ):
  652. index_label = self.index_label[0]
  653. # if string good to go
  654. elif self.index_label and isinstance(self.index_label, str):
  655. index_label = self.index_label
  656. else:
  657. index_label = self.df.index.names[0]
  658. if isinstance(self.columns, MultiIndex):
  659. self.rowcounter += 1
  660. if index_label and self.header is not False:
  661. yield ExcelCell(self.rowcounter - 1, 0, index_label, self.header_style)
  662. # write index_values
  663. index_values = self.df.index
  664. if isinstance(self.df.index, PeriodIndex):
  665. index_values = self.df.index.to_timestamp()
  666. for idx, idxval in enumerate(index_values):
  667. yield CssExcelCell(
  668. row=self.rowcounter + idx,
  669. col=0,
  670. val=idxval,
  671. style=self.header_style,
  672. css_styles=getattr(self.styler, "ctx_index", None),
  673. css_row=idx,
  674. css_col=0,
  675. css_converter=self.style_converter,
  676. )
  677. coloffset = 1
  678. else:
  679. coloffset = 0
  680. yield from self._generate_body(coloffset)
  681. def _format_hierarchical_rows(self) -> Iterable[ExcelCell]:
  682. if self._has_aliases or self.header:
  683. self.rowcounter += 1
  684. gcolidx = 0
  685. if self.index:
  686. index_labels = self.df.index.names
  687. # check for aliases
  688. if self.index_label and isinstance(
  689. self.index_label, (list, tuple, np.ndarray, Index)
  690. ):
  691. index_labels = self.index_label
  692. # MultiIndex columns require an extra row
  693. # with index names (blank if None) for
  694. # unambiguous round-trip, unless not merging,
  695. # in which case the names all go on one row Issue #11328
  696. if isinstance(self.columns, MultiIndex) and self.merge_cells:
  697. self.rowcounter += 1
  698. # if index labels are not empty go ahead and dump
  699. if com.any_not_none(*index_labels) and self.header is not False:
  700. for cidx, name in enumerate(index_labels):
  701. yield ExcelCell(self.rowcounter - 1, cidx, name, self.header_style)
  702. if self.merge_cells:
  703. # Format hierarchical rows as merged cells.
  704. level_strs = self.df.index.format(
  705. sparsify=True, adjoin=False, names=False
  706. )
  707. level_lengths = get_level_lengths(level_strs)
  708. for spans, levels, level_codes in zip(
  709. level_lengths, self.df.index.levels, self.df.index.codes
  710. ):
  711. values = levels.take(
  712. level_codes,
  713. allow_fill=levels._can_hold_na,
  714. fill_value=levels._na_value,
  715. )
  716. for i, span_val in spans.items():
  717. mergestart, mergeend = None, None
  718. if span_val > 1:
  719. mergestart = self.rowcounter + i + span_val - 1
  720. mergeend = gcolidx
  721. yield CssExcelCell(
  722. row=self.rowcounter + i,
  723. col=gcolidx,
  724. val=values[i],
  725. style=self.header_style,
  726. css_styles=getattr(self.styler, "ctx_index", None),
  727. css_row=i,
  728. css_col=gcolidx,
  729. css_converter=self.style_converter,
  730. mergestart=mergestart,
  731. mergeend=mergeend,
  732. )
  733. gcolidx += 1
  734. else:
  735. # Format hierarchical rows with non-merged values.
  736. for indexcolvals in zip(*self.df.index):
  737. for idx, indexcolval in enumerate(indexcolvals):
  738. yield CssExcelCell(
  739. row=self.rowcounter + idx,
  740. col=gcolidx,
  741. val=indexcolval,
  742. style=self.header_style,
  743. css_styles=getattr(self.styler, "ctx_index", None),
  744. css_row=idx,
  745. css_col=gcolidx,
  746. css_converter=self.style_converter,
  747. )
  748. gcolidx += 1
  749. yield from self._generate_body(gcolidx)
  750. @property
  751. def _has_aliases(self) -> bool:
  752. """Whether the aliases for column names are present."""
  753. return is_list_like(self.header)
  754. def _generate_body(self, coloffset: int) -> Iterable[ExcelCell]:
  755. # Write the body of the frame data series by series.
  756. for colidx in range(len(self.columns)):
  757. series = self.df.iloc[:, colidx]
  758. for i, val in enumerate(series):
  759. yield CssExcelCell(
  760. row=self.rowcounter + i,
  761. col=colidx + coloffset,
  762. val=val,
  763. style=None,
  764. css_styles=getattr(self.styler, "ctx", None),
  765. css_row=i,
  766. css_col=colidx,
  767. css_converter=self.style_converter,
  768. )
  769. def get_formatted_cells(self) -> Iterable[ExcelCell]:
  770. for cell in itertools.chain(self._format_header(), self._format_body()):
  771. cell.val = self._format_value(cell.val)
  772. yield cell
  773. @doc(storage_options=_shared_docs["storage_options"])
  774. def write(
  775. self,
  776. writer,
  777. sheet_name: str = "Sheet1",
  778. startrow: int = 0,
  779. startcol: int = 0,
  780. freeze_panes: tuple[int, int] | None = None,
  781. engine: str | None = None,
  782. storage_options: StorageOptions = None,
  783. ) -> None:
  784. """
  785. writer : path-like, file-like, or ExcelWriter object
  786. File path or existing ExcelWriter
  787. sheet_name : str, default 'Sheet1'
  788. Name of sheet which will contain DataFrame
  789. startrow :
  790. upper left cell row to dump data frame
  791. startcol :
  792. upper left cell column to dump data frame
  793. freeze_panes : tuple of integer (length 2), default None
  794. Specifies the one-based bottommost row and rightmost column that
  795. is to be frozen
  796. engine : string, default None
  797. write engine to use if writer is a path - you can also set this
  798. via the options ``io.excel.xlsx.writer``,
  799. or ``io.excel.xlsm.writer``.
  800. {storage_options}
  801. .. versionadded:: 1.2.0
  802. """
  803. from pandas.io.excel import ExcelWriter
  804. num_rows, num_cols = self.df.shape
  805. if num_rows > self.max_rows or num_cols > self.max_cols:
  806. raise ValueError(
  807. f"This sheet is too large! Your sheet size is: {num_rows}, {num_cols} "
  808. f"Max sheet size is: {self.max_rows}, {self.max_cols}"
  809. )
  810. formatted_cells = self.get_formatted_cells()
  811. if isinstance(writer, ExcelWriter):
  812. need_save = False
  813. else:
  814. # error: Cannot instantiate abstract class 'ExcelWriter' with abstract
  815. # attributes 'engine', 'save', 'supported_extensions' and 'write_cells'
  816. writer = ExcelWriter( # type: ignore[abstract]
  817. writer, engine=engine, storage_options=storage_options
  818. )
  819. need_save = True
  820. try:
  821. writer._write_cells(
  822. formatted_cells,
  823. sheet_name,
  824. startrow=startrow,
  825. startcol=startcol,
  826. freeze_panes=freeze_panes,
  827. )
  828. finally:
  829. # make sure to close opened file handles
  830. if need_save:
  831. writer.close()