123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626 |
- from __future__ import annotations
- import mmap
- from typing import (
- TYPE_CHECKING,
- Any,
- Tuple,
- cast,
- )
- import numpy as np
- from pandas._typing import (
- FilePath,
- ReadBuffer,
- Scalar,
- StorageOptions,
- WriteExcelBuffer,
- )
- from pandas.compat._optional import import_optional_dependency
- from pandas.util._decorators import doc
- from pandas.core.shared_docs import _shared_docs
- from pandas.io.excel._base import (
- BaseExcelReader,
- ExcelWriter,
- )
- from pandas.io.excel._util import (
- combine_kwargs,
- validate_freeze_panes,
- )
- if TYPE_CHECKING:
- from openpyxl.descriptors.serialisable import Serialisable
- from openpyxl.workbook import Workbook
- class OpenpyxlWriter(ExcelWriter):
- _engine = "openpyxl"
- _supported_extensions = (".xlsx", ".xlsm")
- def __init__(
- self,
- path: FilePath | WriteExcelBuffer | ExcelWriter,
- engine: str | None = None,
- date_format: str | None = None,
- datetime_format: str | None = None,
- mode: str = "w",
- storage_options: StorageOptions = None,
- if_sheet_exists: str | None = None,
- engine_kwargs: dict[str, Any] | None = None,
- **kwargs,
- ) -> None:
- # Use the openpyxl module as the Excel writer.
- from openpyxl.workbook import Workbook
- engine_kwargs = combine_kwargs(engine_kwargs, kwargs)
- super().__init__(
- path,
- mode=mode,
- storage_options=storage_options,
- if_sheet_exists=if_sheet_exists,
- engine_kwargs=engine_kwargs,
- )
- # ExcelWriter replaced "a" by "r+" to allow us to first read the excel file from
- # the file and later write to it
- if "r+" in self._mode: # Load from existing workbook
- from openpyxl import load_workbook
- try:
- self._book = load_workbook(self._handles.handle, **engine_kwargs)
- except TypeError:
- self._handles.handle.close()
- raise
- self._handles.handle.seek(0)
- else:
- # Create workbook object with default optimized_write=True.
- try:
- self._book = Workbook(**engine_kwargs)
- except TypeError:
- self._handles.handle.close()
- raise
- if self.book.worksheets:
- self.book.remove(self.book.worksheets[0])
- @property
- def book(self) -> Workbook:
- """
- Book instance of class openpyxl.workbook.Workbook.
- This attribute can be used to access engine-specific features.
- """
- return self._book
- @property
- def sheets(self) -> dict[str, Any]:
- """Mapping of sheet names to sheet objects."""
- result = {name: self.book[name] for name in self.book.sheetnames}
- return result
- def _save(self) -> None:
- """
- Save workbook to disk.
- """
- self.book.save(self._handles.handle)
- if "r+" in self._mode and not isinstance(self._handles.handle, mmap.mmap):
- # truncate file to the written content
- self._handles.handle.truncate()
- @classmethod
- def _convert_to_style_kwargs(cls, style_dict: dict) -> dict[str, Serialisable]:
- """
- Convert a style_dict to a set of kwargs suitable for initializing
- or updating-on-copy an openpyxl v2 style object.
- Parameters
- ----------
- style_dict : dict
- A dict with zero or more of the following keys (or their synonyms).
- 'font'
- 'fill'
- 'border' ('borders')
- 'alignment'
- 'number_format'
- 'protection'
- Returns
- -------
- style_kwargs : dict
- A dict with the same, normalized keys as ``style_dict`` but each
- value has been replaced with a native openpyxl style object of the
- appropriate class.
- """
- _style_key_map = {"borders": "border"}
- style_kwargs: dict[str, Serialisable] = {}
- for k, v in style_dict.items():
- k = _style_key_map.get(k, k)
- _conv_to_x = getattr(cls, f"_convert_to_{k}", lambda x: None)
- new_v = _conv_to_x(v)
- if new_v:
- style_kwargs[k] = new_v
- return style_kwargs
- @classmethod
- def _convert_to_color(cls, color_spec):
- """
- Convert ``color_spec`` to an openpyxl v2 Color object.
- Parameters
- ----------
- color_spec : str, dict
- A 32-bit ARGB hex string, or a dict with zero or more of the
- following keys.
- 'rgb'
- 'indexed'
- 'auto'
- 'theme'
- 'tint'
- 'index'
- 'type'
- Returns
- -------
- color : openpyxl.styles.Color
- """
- from openpyxl.styles import Color
- if isinstance(color_spec, str):
- return Color(color_spec)
- else:
- return Color(**color_spec)
- @classmethod
- def _convert_to_font(cls, font_dict):
- """
- Convert ``font_dict`` to an openpyxl v2 Font object.
- Parameters
- ----------
- font_dict : dict
- A dict with zero or more of the following keys (or their synonyms).
- 'name'
- 'size' ('sz')
- 'bold' ('b')
- 'italic' ('i')
- 'underline' ('u')
- 'strikethrough' ('strike')
- 'color'
- 'vertAlign' ('vertalign')
- 'charset'
- 'scheme'
- 'family'
- 'outline'
- 'shadow'
- 'condense'
- Returns
- -------
- font : openpyxl.styles.Font
- """
- from openpyxl.styles import Font
- _font_key_map = {
- "sz": "size",
- "b": "bold",
- "i": "italic",
- "u": "underline",
- "strike": "strikethrough",
- "vertalign": "vertAlign",
- }
- font_kwargs = {}
- for k, v in font_dict.items():
- k = _font_key_map.get(k, k)
- if k == "color":
- v = cls._convert_to_color(v)
- font_kwargs[k] = v
- return Font(**font_kwargs)
- @classmethod
- def _convert_to_stop(cls, stop_seq):
- """
- Convert ``stop_seq`` to a list of openpyxl v2 Color objects,
- suitable for initializing the ``GradientFill`` ``stop`` parameter.
- Parameters
- ----------
- stop_seq : iterable
- An iterable that yields objects suitable for consumption by
- ``_convert_to_color``.
- Returns
- -------
- stop : list of openpyxl.styles.Color
- """
- return map(cls._convert_to_color, stop_seq)
- @classmethod
- def _convert_to_fill(cls, fill_dict: dict[str, Any]):
- """
- Convert ``fill_dict`` to an openpyxl v2 Fill object.
- Parameters
- ----------
- fill_dict : dict
- A dict with one or more of the following keys (or their synonyms),
- 'fill_type' ('patternType', 'patterntype')
- 'start_color' ('fgColor', 'fgcolor')
- 'end_color' ('bgColor', 'bgcolor')
- or one or more of the following keys (or their synonyms).
- 'type' ('fill_type')
- 'degree'
- 'left'
- 'right'
- 'top'
- 'bottom'
- 'stop'
- Returns
- -------
- fill : openpyxl.styles.Fill
- """
- from openpyxl.styles import (
- GradientFill,
- PatternFill,
- )
- _pattern_fill_key_map = {
- "patternType": "fill_type",
- "patterntype": "fill_type",
- "fgColor": "start_color",
- "fgcolor": "start_color",
- "bgColor": "end_color",
- "bgcolor": "end_color",
- }
- _gradient_fill_key_map = {"fill_type": "type"}
- pfill_kwargs = {}
- gfill_kwargs = {}
- for k, v in fill_dict.items():
- pk = _pattern_fill_key_map.get(k)
- gk = _gradient_fill_key_map.get(k)
- if pk in ["start_color", "end_color"]:
- v = cls._convert_to_color(v)
- if gk == "stop":
- v = cls._convert_to_stop(v)
- if pk:
- pfill_kwargs[pk] = v
- elif gk:
- gfill_kwargs[gk] = v
- else:
- pfill_kwargs[k] = v
- gfill_kwargs[k] = v
- try:
- return PatternFill(**pfill_kwargs)
- except TypeError:
- return GradientFill(**gfill_kwargs)
- @classmethod
- def _convert_to_side(cls, side_spec):
- """
- Convert ``side_spec`` to an openpyxl v2 Side object.
- Parameters
- ----------
- side_spec : str, dict
- A string specifying the border style, or a dict with zero or more
- of the following keys (or their synonyms).
- 'style' ('border_style')
- 'color'
- Returns
- -------
- side : openpyxl.styles.Side
- """
- from openpyxl.styles import Side
- _side_key_map = {"border_style": "style"}
- if isinstance(side_spec, str):
- return Side(style=side_spec)
- side_kwargs = {}
- for k, v in side_spec.items():
- k = _side_key_map.get(k, k)
- if k == "color":
- v = cls._convert_to_color(v)
- side_kwargs[k] = v
- return Side(**side_kwargs)
- @classmethod
- def _convert_to_border(cls, border_dict):
- """
- Convert ``border_dict`` to an openpyxl v2 Border object.
- Parameters
- ----------
- border_dict : dict
- A dict with zero or more of the following keys (or their synonyms).
- 'left'
- 'right'
- 'top'
- 'bottom'
- 'diagonal'
- 'diagonal_direction'
- 'vertical'
- 'horizontal'
- 'diagonalUp' ('diagonalup')
- 'diagonalDown' ('diagonaldown')
- 'outline'
- Returns
- -------
- border : openpyxl.styles.Border
- """
- from openpyxl.styles import Border
- _border_key_map = {"diagonalup": "diagonalUp", "diagonaldown": "diagonalDown"}
- border_kwargs = {}
- for k, v in border_dict.items():
- k = _border_key_map.get(k, k)
- if k == "color":
- v = cls._convert_to_color(v)
- if k in ["left", "right", "top", "bottom", "diagonal"]:
- v = cls._convert_to_side(v)
- border_kwargs[k] = v
- return Border(**border_kwargs)
- @classmethod
- def _convert_to_alignment(cls, alignment_dict):
- """
- Convert ``alignment_dict`` to an openpyxl v2 Alignment object.
- Parameters
- ----------
- alignment_dict : dict
- A dict with zero or more of the following keys (or their synonyms).
- 'horizontal'
- 'vertical'
- 'text_rotation'
- 'wrap_text'
- 'shrink_to_fit'
- 'indent'
- Returns
- -------
- alignment : openpyxl.styles.Alignment
- """
- from openpyxl.styles import Alignment
- return Alignment(**alignment_dict)
- @classmethod
- def _convert_to_number_format(cls, number_format_dict):
- """
- Convert ``number_format_dict`` to an openpyxl v2.1.0 number format
- initializer.
- Parameters
- ----------
- number_format_dict : dict
- A dict with zero or more of the following keys.
- 'format_code' : str
- Returns
- -------
- number_format : str
- """
- return number_format_dict["format_code"]
- @classmethod
- def _convert_to_protection(cls, protection_dict):
- """
- Convert ``protection_dict`` to an openpyxl v2 Protection object.
- Parameters
- ----------
- protection_dict : dict
- A dict with zero or more of the following keys.
- 'locked'
- 'hidden'
- Returns
- -------
- """
- from openpyxl.styles import Protection
- return Protection(**protection_dict)
- def _write_cells(
- self,
- cells,
- sheet_name: str | None = None,
- startrow: int = 0,
- startcol: int = 0,
- freeze_panes: tuple[int, int] | None = None,
- ) -> None:
- # Write the frame cells using openpyxl.
- sheet_name = self._get_sheet_name(sheet_name)
- _style_cache: dict[str, dict[str, Serialisable]] = {}
- if sheet_name in self.sheets and self._if_sheet_exists != "new":
- if "r+" in self._mode:
- if self._if_sheet_exists == "replace":
- old_wks = self.sheets[sheet_name]
- target_index = self.book.index(old_wks)
- del self.book[sheet_name]
- wks = self.book.create_sheet(sheet_name, target_index)
- elif self._if_sheet_exists == "error":
- raise ValueError(
- f"Sheet '{sheet_name}' already exists and "
- f"if_sheet_exists is set to 'error'."
- )
- elif self._if_sheet_exists == "overlay":
- wks = self.sheets[sheet_name]
- else:
- raise ValueError(
- f"'{self._if_sheet_exists}' is not valid for if_sheet_exists. "
- "Valid options are 'error', 'new', 'replace' and 'overlay'."
- )
- else:
- wks = self.sheets[sheet_name]
- else:
- wks = self.book.create_sheet()
- wks.title = sheet_name
- if validate_freeze_panes(freeze_panes):
- freeze_panes = cast(Tuple[int, int], freeze_panes)
- wks.freeze_panes = wks.cell(
- row=freeze_panes[0] + 1, column=freeze_panes[1] + 1
- )
- for cell in cells:
- xcell = wks.cell(
- row=startrow + cell.row + 1, column=startcol + cell.col + 1
- )
- xcell.value, fmt = self._value_with_fmt(cell.val)
- if fmt:
- xcell.number_format = fmt
- style_kwargs: dict[str, Serialisable] | None = {}
- if cell.style:
- key = str(cell.style)
- style_kwargs = _style_cache.get(key)
- if style_kwargs is None:
- style_kwargs = self._convert_to_style_kwargs(cell.style)
- _style_cache[key] = style_kwargs
- if style_kwargs:
- for k, v in style_kwargs.items():
- setattr(xcell, k, v)
- if cell.mergestart is not None and cell.mergeend is not None:
- wks.merge_cells(
- start_row=startrow + cell.row + 1,
- start_column=startcol + cell.col + 1,
- end_column=startcol + cell.mergeend + 1,
- end_row=startrow + cell.mergestart + 1,
- )
- # When cells are merged only the top-left cell is preserved
- # The behaviour of the other cells in a merged range is
- # undefined
- if style_kwargs:
- first_row = startrow + cell.row + 1
- last_row = startrow + cell.mergestart + 1
- first_col = startcol + cell.col + 1
- last_col = startcol + cell.mergeend + 1
- for row in range(first_row, last_row + 1):
- for col in range(first_col, last_col + 1):
- if row == first_row and col == first_col:
- # Ignore first cell. It is already handled.
- continue
- xcell = wks.cell(column=col, row=row)
- for k, v in style_kwargs.items():
- setattr(xcell, k, v)
- class OpenpyxlReader(BaseExcelReader):
- @doc(storage_options=_shared_docs["storage_options"])
- def __init__(
- self,
- filepath_or_buffer: FilePath | ReadBuffer[bytes],
- storage_options: StorageOptions = None,
- ) -> None:
- """
- Reader using openpyxl engine.
- Parameters
- ----------
- filepath_or_buffer : str, path object or Workbook
- Object to be parsed.
- {storage_options}
- """
- import_optional_dependency("openpyxl")
- super().__init__(filepath_or_buffer, storage_options=storage_options)
- @property
- def _workbook_class(self):
- from openpyxl import Workbook
- return Workbook
- def load_workbook(self, filepath_or_buffer: FilePath | ReadBuffer[bytes]):
- from openpyxl import load_workbook
- return load_workbook(
- filepath_or_buffer, read_only=True, data_only=True, keep_links=False
- )
- @property
- def sheet_names(self) -> list[str]:
- return [sheet.title for sheet in self.book.worksheets]
- def get_sheet_by_name(self, name: str):
- self.raise_if_bad_sheet_by_name(name)
- return self.book[name]
- def get_sheet_by_index(self, index: int):
- self.raise_if_bad_sheet_by_index(index)
- return self.book.worksheets[index]
- def _convert_cell(self, cell) -> Scalar:
- from openpyxl.cell.cell import (
- TYPE_ERROR,
- TYPE_NUMERIC,
- )
- if cell.value is None:
- return "" # compat with xlrd
- elif cell.data_type == TYPE_ERROR:
- return np.nan
- elif cell.data_type == TYPE_NUMERIC:
- val = int(cell.value)
- if val == cell.value:
- return val
- return float(cell.value)
- return cell.value
- def get_sheet_data(
- self, sheet, file_rows_needed: int | None = None
- ) -> list[list[Scalar]]:
- if self.book.read_only:
- sheet.reset_dimensions()
- data: list[list[Scalar]] = []
- last_row_with_data = -1
- for row_number, row in enumerate(sheet.rows):
- converted_row = [self._convert_cell(cell) for cell in row]
- while converted_row and converted_row[-1] == "":
- # trim trailing empty elements
- converted_row.pop()
- if converted_row:
- last_row_with_data = row_number
- data.append(converted_row)
- if file_rows_needed is not None and len(data) >= file_rows_needed:
- break
- # Trim trailing empty rows
- data = data[: last_row_with_data + 1]
- if len(data) > 0:
- # extend rows to max width
- max_width = max(len(data_row) for data_row in data)
- if min(len(data_row) for data_row in data) < max_width:
- empty_cell: list[Scalar] = [""]
- data = [
- data_row + (max_width - len(data_row)) * empty_cell
- for data_row in data
- ]
- return data
|