123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398 |
- import contextlib
- from pathlib import Path
- import re
- import numpy as np
- import pytest
- import pandas as pd
- from pandas import DataFrame
- import pandas._testing as tm
- from pandas.io.excel import (
- ExcelWriter,
- _OpenpyxlWriter,
- )
- openpyxl = pytest.importorskip("openpyxl")
- pytestmark = pytest.mark.parametrize("ext", [".xlsx"])
- def test_to_excel_styleconverter(ext):
- from openpyxl import styles
- hstyle = {
- "font": {"color": "00FF0000", "bold": True},
- "borders": {"top": "thin", "right": "thin", "bottom": "thin", "left": "thin"},
- "alignment": {"horizontal": "center", "vertical": "top"},
- "fill": {"patternType": "solid", "fgColor": {"rgb": "006666FF", "tint": 0.3}},
- "number_format": {"format_code": "0.00"},
- "protection": {"locked": True, "hidden": False},
- }
- font_color = styles.Color("00FF0000")
- font = styles.Font(bold=True, color=font_color)
- side = styles.Side(style=styles.borders.BORDER_THIN)
- border = styles.Border(top=side, right=side, bottom=side, left=side)
- alignment = styles.Alignment(horizontal="center", vertical="top")
- fill_color = styles.Color(rgb="006666FF", tint=0.3)
- fill = styles.PatternFill(patternType="solid", fgColor=fill_color)
- number_format = "0.00"
- protection = styles.Protection(locked=True, hidden=False)
- kw = _OpenpyxlWriter._convert_to_style_kwargs(hstyle)
- assert kw["font"] == font
- assert kw["border"] == border
- assert kw["alignment"] == alignment
- assert kw["fill"] == fill
- assert kw["number_format"] == number_format
- assert kw["protection"] == protection
- def test_write_cells_merge_styled(ext):
- from pandas.io.formats.excel import ExcelCell
- sheet_name = "merge_styled"
- sty_b1 = {"font": {"color": "00FF0000"}}
- sty_a2 = {"font": {"color": "0000FF00"}}
- initial_cells = [
- ExcelCell(col=1, row=0, val=42, style=sty_b1),
- ExcelCell(col=0, row=1, val=99, style=sty_a2),
- ]
- sty_merged = {"font": {"color": "000000FF", "bold": True}}
- sty_kwargs = _OpenpyxlWriter._convert_to_style_kwargs(sty_merged)
- openpyxl_sty_merged = sty_kwargs["font"]
- merge_cells = [
- ExcelCell(
- col=0, row=0, val="pandas", mergestart=1, mergeend=1, style=sty_merged
- )
- ]
- with tm.ensure_clean(ext) as path:
- with _OpenpyxlWriter(path) as writer:
- writer._write_cells(initial_cells, sheet_name=sheet_name)
- writer._write_cells(merge_cells, sheet_name=sheet_name)
- wks = writer.sheets[sheet_name]
- xcell_b1 = wks["B1"]
- xcell_a2 = wks["A2"]
- assert xcell_b1.font == openpyxl_sty_merged
- assert xcell_a2.font == openpyxl_sty_merged
- @pytest.mark.parametrize("iso_dates", [True, False])
- def test_engine_kwargs_write(ext, iso_dates):
- # GH 42286 GH 43445
- engine_kwargs = {"iso_dates": iso_dates}
- with tm.ensure_clean(ext) as f:
- with ExcelWriter(f, engine="openpyxl", engine_kwargs=engine_kwargs) as writer:
- assert writer.book.iso_dates == iso_dates
- # ExcelWriter won't allow us to close without writing something
- DataFrame().to_excel(writer)
- def test_engine_kwargs_append_invalid(ext):
- # GH 43445
- # test whether an invalid engine kwargs actually raises
- with tm.ensure_clean(ext) as f:
- DataFrame(["hello", "world"]).to_excel(f)
- with pytest.raises(
- TypeError,
- match=re.escape(
- "load_workbook() got an unexpected keyword argument 'apple_banana'"
- ),
- ):
- with ExcelWriter(
- f, engine="openpyxl", mode="a", engine_kwargs={"apple_banana": "fruit"}
- ) as writer:
- # ExcelWriter needs us to write something to close properly
- DataFrame(["good"]).to_excel(writer, sheet_name="Sheet2")
- @pytest.mark.parametrize("data_only, expected", [(True, 0), (False, "=1+1")])
- def test_engine_kwargs_append_data_only(ext, data_only, expected):
- # GH 43445
- # tests whether the data_only engine_kwarg actually works well for
- # openpyxl's load_workbook
- with tm.ensure_clean(ext) as f:
- DataFrame(["=1+1"]).to_excel(f)
- with ExcelWriter(
- f, engine="openpyxl", mode="a", engine_kwargs={"data_only": data_only}
- ) as writer:
- assert writer.sheets["Sheet1"]["B2"].value == expected
- # ExcelWriter needs us to writer something to close properly?
- DataFrame().to_excel(writer, sheet_name="Sheet2")
- @pytest.mark.parametrize(
- "mode,expected", [("w", ["baz"]), ("a", ["foo", "bar", "baz"])]
- )
- def test_write_append_mode(ext, mode, expected):
- df = DataFrame([1], columns=["baz"])
- with tm.ensure_clean(ext) as f:
- wb = openpyxl.Workbook()
- wb.worksheets[0].title = "foo"
- wb.worksheets[0]["A1"].value = "foo"
- wb.create_sheet("bar")
- wb.worksheets[1]["A1"].value = "bar"
- wb.save(f)
- with ExcelWriter(f, engine="openpyxl", mode=mode) as writer:
- df.to_excel(writer, sheet_name="baz", index=False)
- with contextlib.closing(openpyxl.load_workbook(f)) as wb2:
- result = [sheet.title for sheet in wb2.worksheets]
- assert result == expected
- for index, cell_value in enumerate(expected):
- assert wb2.worksheets[index]["A1"].value == cell_value
- @pytest.mark.parametrize(
- "if_sheet_exists,num_sheets,expected",
- [
- ("new", 2, ["apple", "banana"]),
- ("replace", 1, ["pear"]),
- ("overlay", 1, ["pear", "banana"]),
- ],
- )
- def test_if_sheet_exists_append_modes(ext, if_sheet_exists, num_sheets, expected):
- # GH 40230
- df1 = DataFrame({"fruit": ["apple", "banana"]})
- df2 = DataFrame({"fruit": ["pear"]})
- with tm.ensure_clean(ext) as f:
- df1.to_excel(f, engine="openpyxl", sheet_name="foo", index=False)
- with ExcelWriter(
- f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
- ) as writer:
- df2.to_excel(writer, sheet_name="foo", index=False)
- with contextlib.closing(openpyxl.load_workbook(f)) as wb:
- assert len(wb.sheetnames) == num_sheets
- assert wb.sheetnames[0] == "foo"
- result = pd.read_excel(wb, "foo", engine="openpyxl")
- assert list(result["fruit"]) == expected
- if len(wb.sheetnames) == 2:
- result = pd.read_excel(wb, wb.sheetnames[1], engine="openpyxl")
- tm.assert_frame_equal(result, df2)
- @pytest.mark.parametrize(
- "startrow, startcol, greeting, goodbye",
- [
- (0, 0, ["poop", "world"], ["goodbye", "people"]),
- (0, 1, ["hello", "world"], ["poop", "people"]),
- (1, 0, ["hello", "poop"], ["goodbye", "people"]),
- (1, 1, ["hello", "world"], ["goodbye", "poop"]),
- ],
- )
- def test_append_overlay_startrow_startcol(ext, startrow, startcol, greeting, goodbye):
- df1 = DataFrame({"greeting": ["hello", "world"], "goodbye": ["goodbye", "people"]})
- df2 = DataFrame(["poop"])
- with tm.ensure_clean(ext) as f:
- df1.to_excel(f, engine="openpyxl", sheet_name="poo", index=False)
- with ExcelWriter(
- f, engine="openpyxl", mode="a", if_sheet_exists="overlay"
- ) as writer:
- # use startrow+1 because we don't have a header
- df2.to_excel(
- writer,
- index=False,
- header=False,
- startrow=startrow + 1,
- startcol=startcol,
- sheet_name="poo",
- )
- result = pd.read_excel(f, sheet_name="poo", engine="openpyxl")
- expected = DataFrame({"greeting": greeting, "goodbye": goodbye})
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize(
- "if_sheet_exists,msg",
- [
- (
- "invalid",
- "'invalid' is not valid for if_sheet_exists. Valid options "
- "are 'error', 'new', 'replace' and 'overlay'.",
- ),
- (
- "error",
- "Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
- ),
- (
- None,
- "Sheet 'foo' already exists and if_sheet_exists is set to 'error'.",
- ),
- ],
- )
- def test_if_sheet_exists_raises(ext, if_sheet_exists, msg):
- # GH 40230
- df = DataFrame({"fruit": ["pear"]})
- with tm.ensure_clean(ext) as f:
- with pytest.raises(ValueError, match=re.escape(msg)):
- df.to_excel(f, "foo", engine="openpyxl")
- with ExcelWriter(
- f, engine="openpyxl", mode="a", if_sheet_exists=if_sheet_exists
- ) as writer:
- df.to_excel(writer, sheet_name="foo")
- def test_to_excel_with_openpyxl_engine(ext):
- # GH 29854
- with tm.ensure_clean(ext) as filename:
- df1 = DataFrame({"A": np.linspace(1, 10, 10)})
- df2 = DataFrame({"B": np.linspace(1, 20, 10)})
- df = pd.concat([df1, df2], axis=1)
- styled = df.style.applymap(
- lambda val: f"color: {'red' if val < 0 else 'black'}"
- ).highlight_max()
- styled.to_excel(filename, engine="openpyxl")
- @pytest.mark.parametrize("read_only", [True, False])
- def test_read_workbook(datapath, ext, read_only):
- # GH 39528
- filename = datapath("io", "data", "excel", "test1" + ext)
- with contextlib.closing(
- openpyxl.load_workbook(filename, read_only=read_only)
- ) as wb:
- result = pd.read_excel(wb, engine="openpyxl")
- expected = pd.read_excel(filename)
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize(
- "header, expected_data",
- [
- (
- 0,
- {
- "Title": [np.nan, "A", 1, 2, 3],
- "Unnamed: 1": [np.nan, "B", 4, 5, 6],
- "Unnamed: 2": [np.nan, "C", 7, 8, 9],
- },
- ),
- (2, {"A": [1, 2, 3], "B": [4, 5, 6], "C": [7, 8, 9]}),
- ],
- )
- @pytest.mark.parametrize(
- "filename", ["dimension_missing", "dimension_small", "dimension_large"]
- )
- # When read_only is None, use read_excel instead of a workbook
- @pytest.mark.parametrize("read_only", [True, False, None])
- def test_read_with_bad_dimension(
- datapath, ext, header, expected_data, filename, read_only
- ):
- # GH 38956, 39001 - no/incorrect dimension information
- path = datapath("io", "data", "excel", f"{filename}{ext}")
- if read_only is None:
- result = pd.read_excel(path, header=header)
- else:
- with contextlib.closing(
- openpyxl.load_workbook(path, read_only=read_only)
- ) as wb:
- result = pd.read_excel(wb, engine="openpyxl", header=header)
- expected = DataFrame(expected_data)
- tm.assert_frame_equal(result, expected)
- def test_append_mode_file(ext):
- # GH 39576
- df = DataFrame()
- with tm.ensure_clean(ext) as f:
- df.to_excel(f, engine="openpyxl")
- with ExcelWriter(
- f, mode="a", engine="openpyxl", if_sheet_exists="new"
- ) as writer:
- df.to_excel(writer)
- # make sure that zip files are not concatenated by making sure that
- # "docProps/app.xml" only occurs twice in the file
- data = Path(f).read_bytes()
- first = data.find(b"docProps/app.xml")
- second = data.find(b"docProps/app.xml", first + 1)
- third = data.find(b"docProps/app.xml", second + 1)
- assert second != -1 and third == -1
- # When read_only is None, use read_excel instead of a workbook
- @pytest.mark.parametrize("read_only", [True, False, None])
- def test_read_with_empty_trailing_rows(datapath, ext, read_only):
- # GH 39181
- path = datapath("io", "data", "excel", f"empty_trailing_rows{ext}")
- if read_only is None:
- result = pd.read_excel(path)
- else:
- with contextlib.closing(
- openpyxl.load_workbook(path, read_only=read_only)
- ) as wb:
- result = pd.read_excel(wb, engine="openpyxl")
- expected = DataFrame(
- {
- "Title": [np.nan, "A", 1, 2, 3],
- "Unnamed: 1": [np.nan, "B", 4, 5, 6],
- "Unnamed: 2": [np.nan, "C", 7, 8, 9],
- }
- )
- tm.assert_frame_equal(result, expected)
- # When read_only is None, use read_excel instead of a workbook
- @pytest.mark.parametrize("read_only", [True, False, None])
- def test_read_empty_with_blank_row(datapath, ext, read_only):
- # GH 39547 - empty excel file with a row that has no data
- path = datapath("io", "data", "excel", f"empty_with_blank_row{ext}")
- if read_only is None:
- result = pd.read_excel(path)
- else:
- with contextlib.closing(
- openpyxl.load_workbook(path, read_only=read_only)
- ) as wb:
- result = pd.read_excel(wb, engine="openpyxl")
- expected = DataFrame()
- tm.assert_frame_equal(result, expected)
- def test_book_and_sheets_consistent(ext):
- # GH#45687 - Ensure sheets is updated if user modifies book
- with tm.ensure_clean(ext) as f:
- with ExcelWriter(f, engine="openpyxl") as writer:
- assert writer.sheets == {}
- sheet = writer.book.create_sheet("test_name", 0)
- assert writer.sheets == {"test_name": sheet}
- def test_ints_spelled_with_decimals(datapath, ext):
- # GH 46988 - openpyxl returns this sheet with floats
- path = datapath("io", "data", "excel", f"ints_spelled_with_decimals{ext}")
- result = pd.read_excel(path)
- expected = DataFrame(range(2, 12), columns=[1])
- tm.assert_frame_equal(result, expected)
- def test_read_multiindex_header_no_index_names(datapath, ext):
- # GH#47487
- path = datapath("io", "data", "excel", f"multiindex_no_index_names{ext}")
- result = pd.read_excel(path, index_col=[0, 1, 2], header=[0, 1, 2])
- expected = DataFrame(
- [[np.nan, "x", "x", "x"], ["x", np.nan, np.nan, np.nan]],
- columns=pd.MultiIndex.from_tuples(
- [("X", "Y", "A1"), ("X", "Y", "A2"), ("XX", "YY", "B1"), ("XX", "YY", "B2")]
- ),
- index=pd.MultiIndex.from_tuples([("A", "AA", "AAA"), ("A", "BB", "BBB")]),
- )
- tm.assert_frame_equal(result, expected)
|