test_writers.py 47 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334
  1. from datetime import (
  2. date,
  3. datetime,
  4. timedelta,
  5. )
  6. from functools import partial
  7. from io import BytesIO
  8. import os
  9. import re
  10. import numpy as np
  11. import pytest
  12. import pandas.util._test_decorators as td
  13. import pandas as pd
  14. from pandas import (
  15. DataFrame,
  16. Index,
  17. MultiIndex,
  18. option_context,
  19. )
  20. import pandas._testing as tm
  21. from pandas.io.excel import (
  22. ExcelFile,
  23. ExcelWriter,
  24. _OpenpyxlWriter,
  25. _XlsxWriter,
  26. register_writer,
  27. )
  28. from pandas.io.excel._util import _writers
  29. @pytest.fixture
  30. def path(ext):
  31. """
  32. Fixture to open file for use in each test case.
  33. """
  34. with tm.ensure_clean(ext) as file_path:
  35. yield file_path
  36. @pytest.fixture
  37. def set_engine(engine, ext):
  38. """
  39. Fixture to set engine for use in each test case.
  40. Rather than requiring `engine=...` to be provided explicitly as an
  41. argument in each test, this fixture sets a global option to dictate
  42. which engine should be used to write Excel files. After executing
  43. the test it rolls back said change to the global option.
  44. """
  45. option_name = f"io.excel.{ext.strip('.')}.writer"
  46. with option_context(option_name, engine):
  47. yield
  48. @pytest.mark.parametrize(
  49. "ext",
  50. [
  51. pytest.param(".xlsx", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]),
  52. pytest.param(".xlsm", marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")]),
  53. pytest.param(
  54. ".xlsx", marks=[td.skip_if_no("xlsxwriter"), td.skip_if_no("xlrd")]
  55. ),
  56. pytest.param(".ods", marks=td.skip_if_no("odf")),
  57. ],
  58. )
  59. class TestRoundTrip:
  60. @pytest.mark.parametrize(
  61. "header,expected",
  62. [(None, DataFrame([np.nan] * 4)), (0, DataFrame({"Unnamed: 0": [np.nan] * 3}))],
  63. )
  64. def test_read_one_empty_col_no_header(self, ext, header, expected):
  65. # xref gh-12292
  66. filename = "no_header"
  67. df = DataFrame([["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]])
  68. with tm.ensure_clean(ext) as path:
  69. df.to_excel(path, filename, index=False, header=False)
  70. result = pd.read_excel(
  71. path, sheet_name=filename, usecols=[0], header=header
  72. )
  73. tm.assert_frame_equal(result, expected)
  74. @pytest.mark.parametrize(
  75. "header,expected",
  76. [(None, DataFrame([0] + [np.nan] * 4)), (0, DataFrame([np.nan] * 4))],
  77. )
  78. def test_read_one_empty_col_with_header(self, ext, header, expected):
  79. filename = "with_header"
  80. df = DataFrame([["", 1, 100], ["", 2, 200], ["", 3, 300], ["", 4, 400]])
  81. with tm.ensure_clean(ext) as path:
  82. df.to_excel(path, "with_header", index=False, header=True)
  83. result = pd.read_excel(
  84. path, sheet_name=filename, usecols=[0], header=header
  85. )
  86. tm.assert_frame_equal(result, expected)
  87. def test_set_column_names_in_parameter(self, ext):
  88. # GH 12870 : pass down column names associated with
  89. # keyword argument names
  90. refdf = DataFrame([[1, "foo"], [2, "bar"], [3, "baz"]], columns=["a", "b"])
  91. with tm.ensure_clean(ext) as pth:
  92. with ExcelWriter(pth) as writer:
  93. refdf.to_excel(writer, "Data_no_head", header=False, index=False)
  94. refdf.to_excel(writer, "Data_with_head", index=False)
  95. refdf.columns = ["A", "B"]
  96. with ExcelFile(pth) as reader:
  97. xlsdf_no_head = pd.read_excel(
  98. reader, sheet_name="Data_no_head", header=None, names=["A", "B"]
  99. )
  100. xlsdf_with_head = pd.read_excel(
  101. reader,
  102. sheet_name="Data_with_head",
  103. index_col=None,
  104. names=["A", "B"],
  105. )
  106. tm.assert_frame_equal(xlsdf_no_head, refdf)
  107. tm.assert_frame_equal(xlsdf_with_head, refdf)
  108. def test_creating_and_reading_multiple_sheets(self, ext):
  109. # see gh-9450
  110. #
  111. # Test reading multiple sheets, from a runtime
  112. # created Excel file with multiple sheets.
  113. def tdf(col_sheet_name):
  114. d, i = [11, 22, 33], [1, 2, 3]
  115. return DataFrame(d, i, columns=[col_sheet_name])
  116. sheets = ["AAA", "BBB", "CCC"]
  117. dfs = [tdf(s) for s in sheets]
  118. dfs = dict(zip(sheets, dfs))
  119. with tm.ensure_clean(ext) as pth:
  120. with ExcelWriter(pth) as ew:
  121. for sheetname, df in dfs.items():
  122. df.to_excel(ew, sheetname)
  123. dfs_returned = pd.read_excel(pth, sheet_name=sheets, index_col=0)
  124. for s in sheets:
  125. tm.assert_frame_equal(dfs[s], dfs_returned[s])
  126. def test_read_excel_multiindex_empty_level(self, ext):
  127. # see gh-12453
  128. with tm.ensure_clean(ext) as path:
  129. df = DataFrame(
  130. {
  131. ("One", "x"): {0: 1},
  132. ("Two", "X"): {0: 3},
  133. ("Two", "Y"): {0: 7},
  134. ("Zero", ""): {0: 0},
  135. }
  136. )
  137. expected = DataFrame(
  138. {
  139. ("One", "x"): {0: 1},
  140. ("Two", "X"): {0: 3},
  141. ("Two", "Y"): {0: 7},
  142. ("Zero", "Unnamed: 4_level_1"): {0: 0},
  143. }
  144. )
  145. df.to_excel(path)
  146. actual = pd.read_excel(path, header=[0, 1], index_col=0)
  147. tm.assert_frame_equal(actual, expected)
  148. df = DataFrame(
  149. {
  150. ("Beg", ""): {0: 0},
  151. ("Middle", "x"): {0: 1},
  152. ("Tail", "X"): {0: 3},
  153. ("Tail", "Y"): {0: 7},
  154. }
  155. )
  156. expected = DataFrame(
  157. {
  158. ("Beg", "Unnamed: 1_level_1"): {0: 0},
  159. ("Middle", "x"): {0: 1},
  160. ("Tail", "X"): {0: 3},
  161. ("Tail", "Y"): {0: 7},
  162. }
  163. )
  164. df.to_excel(path)
  165. actual = pd.read_excel(path, header=[0, 1], index_col=0)
  166. tm.assert_frame_equal(actual, expected)
  167. @pytest.mark.parametrize("c_idx_names", [True, False])
  168. @pytest.mark.parametrize("r_idx_names", [True, False])
  169. @pytest.mark.parametrize("c_idx_levels", [1, 3])
  170. @pytest.mark.parametrize("r_idx_levels", [1, 3])
  171. def test_excel_multindex_roundtrip(
  172. self, ext, c_idx_names, r_idx_names, c_idx_levels, r_idx_levels, request
  173. ):
  174. # see gh-4679
  175. with tm.ensure_clean(ext) as pth:
  176. if (c_idx_levels == 1 and c_idx_names) and not (
  177. r_idx_levels == 3 and not r_idx_names
  178. ):
  179. mark = pytest.mark.xfail(
  180. reason="Column index name cannot be serialized unless "
  181. "it's a MultiIndex"
  182. )
  183. request.node.add_marker(mark)
  184. # Empty name case current read in as
  185. # unnamed levels, not Nones.
  186. check_names = r_idx_names or r_idx_levels <= 1
  187. df = tm.makeCustomDataframe(
  188. 5, 5, c_idx_names, r_idx_names, c_idx_levels, r_idx_levels
  189. )
  190. df.to_excel(pth)
  191. act = pd.read_excel(
  192. pth,
  193. index_col=list(range(r_idx_levels)),
  194. header=list(range(c_idx_levels)),
  195. )
  196. tm.assert_frame_equal(df, act, check_names=check_names)
  197. df.iloc[0, :] = np.nan
  198. df.to_excel(pth)
  199. act = pd.read_excel(
  200. pth,
  201. index_col=list(range(r_idx_levels)),
  202. header=list(range(c_idx_levels)),
  203. )
  204. tm.assert_frame_equal(df, act, check_names=check_names)
  205. df.iloc[-1, :] = np.nan
  206. df.to_excel(pth)
  207. act = pd.read_excel(
  208. pth,
  209. index_col=list(range(r_idx_levels)),
  210. header=list(range(c_idx_levels)),
  211. )
  212. tm.assert_frame_equal(df, act, check_names=check_names)
  213. def test_read_excel_parse_dates(self, ext):
  214. # see gh-11544, gh-12051
  215. df = DataFrame(
  216. {"col": [1, 2, 3], "date_strings": pd.date_range("2012-01-01", periods=3)}
  217. )
  218. df2 = df.copy()
  219. df2["date_strings"] = df2["date_strings"].dt.strftime("%m/%d/%Y")
  220. with tm.ensure_clean(ext) as pth:
  221. df2.to_excel(pth)
  222. res = pd.read_excel(pth, index_col=0)
  223. tm.assert_frame_equal(df2, res)
  224. res = pd.read_excel(pth, parse_dates=["date_strings"], index_col=0)
  225. tm.assert_frame_equal(df, res)
  226. date_parser = lambda x: datetime.strptime(x, "%m/%d/%Y")
  227. with tm.assert_produces_warning(
  228. FutureWarning, match="use 'date_format' instead"
  229. ):
  230. res = pd.read_excel(
  231. pth,
  232. parse_dates=["date_strings"],
  233. date_parser=date_parser,
  234. index_col=0,
  235. )
  236. tm.assert_frame_equal(df, res)
  237. res = pd.read_excel(
  238. pth, parse_dates=["date_strings"], date_format="%m/%d/%Y", index_col=0
  239. )
  240. tm.assert_frame_equal(df, res)
  241. def test_multiindex_interval_datetimes(self, ext):
  242. # GH 30986
  243. midx = MultiIndex.from_arrays(
  244. [
  245. range(4),
  246. pd.interval_range(
  247. start=pd.Timestamp("2020-01-01"), periods=4, freq="6M"
  248. ),
  249. ]
  250. )
  251. df = DataFrame(range(4), index=midx)
  252. with tm.ensure_clean(ext) as pth:
  253. df.to_excel(pth)
  254. result = pd.read_excel(pth, index_col=[0, 1])
  255. expected = DataFrame(
  256. range(4),
  257. MultiIndex.from_arrays(
  258. [
  259. range(4),
  260. [
  261. "(2020-01-31, 2020-07-31]",
  262. "(2020-07-31, 2021-01-31]",
  263. "(2021-01-31, 2021-07-31]",
  264. "(2021-07-31, 2022-01-31]",
  265. ],
  266. ]
  267. ),
  268. )
  269. tm.assert_frame_equal(result, expected)
  270. @pytest.mark.parametrize(
  271. "engine,ext",
  272. [
  273. pytest.param(
  274. "openpyxl",
  275. ".xlsx",
  276. marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")],
  277. ),
  278. pytest.param(
  279. "openpyxl",
  280. ".xlsm",
  281. marks=[td.skip_if_no("openpyxl"), td.skip_if_no("xlrd")],
  282. ),
  283. pytest.param(
  284. "xlsxwriter",
  285. ".xlsx",
  286. marks=[td.skip_if_no("xlsxwriter"), td.skip_if_no("xlrd")],
  287. ),
  288. pytest.param("odf", ".ods", marks=td.skip_if_no("odf")),
  289. ],
  290. )
  291. @pytest.mark.usefixtures("set_engine")
  292. class TestExcelWriter:
  293. def test_excel_sheet_size(self, path):
  294. # GH 26080
  295. breaking_row_count = 2**20 + 1
  296. breaking_col_count = 2**14 + 1
  297. # purposely using two arrays to prevent memory issues while testing
  298. row_arr = np.zeros(shape=(breaking_row_count, 1))
  299. col_arr = np.zeros(shape=(1, breaking_col_count))
  300. row_df = DataFrame(row_arr)
  301. col_df = DataFrame(col_arr)
  302. msg = "sheet is too large"
  303. with pytest.raises(ValueError, match=msg):
  304. row_df.to_excel(path)
  305. with pytest.raises(ValueError, match=msg):
  306. col_df.to_excel(path)
  307. def test_excel_sheet_by_name_raise(self, path):
  308. gt = DataFrame(np.random.randn(10, 2))
  309. gt.to_excel(path)
  310. with ExcelFile(path) as xl:
  311. df = pd.read_excel(xl, sheet_name=0, index_col=0)
  312. tm.assert_frame_equal(gt, df)
  313. msg = "Worksheet named '0' not found"
  314. with pytest.raises(ValueError, match=msg):
  315. pd.read_excel(xl, "0")
  316. def test_excel_writer_context_manager(self, frame, path):
  317. with ExcelWriter(path) as writer:
  318. frame.to_excel(writer, "Data1")
  319. frame2 = frame.copy()
  320. frame2.columns = frame.columns[::-1]
  321. frame2.to_excel(writer, "Data2")
  322. with ExcelFile(path) as reader:
  323. found_df = pd.read_excel(reader, sheet_name="Data1", index_col=0)
  324. found_df2 = pd.read_excel(reader, sheet_name="Data2", index_col=0)
  325. tm.assert_frame_equal(found_df, frame)
  326. tm.assert_frame_equal(found_df2, frame2)
  327. def test_roundtrip(self, frame, path):
  328. frame = frame.copy()
  329. frame.iloc[:5, frame.columns.get_loc("A")] = np.nan
  330. frame.to_excel(path, "test1")
  331. frame.to_excel(path, "test1", columns=["A", "B"])
  332. frame.to_excel(path, "test1", header=False)
  333. frame.to_excel(path, "test1", index=False)
  334. # test roundtrip
  335. frame.to_excel(path, "test1")
  336. recons = pd.read_excel(path, sheet_name="test1", index_col=0)
  337. tm.assert_frame_equal(frame, recons)
  338. frame.to_excel(path, "test1", index=False)
  339. recons = pd.read_excel(path, sheet_name="test1", index_col=None)
  340. recons.index = frame.index
  341. tm.assert_frame_equal(frame, recons)
  342. frame.to_excel(path, "test1", na_rep="NA")
  343. recons = pd.read_excel(path, sheet_name="test1", index_col=0, na_values=["NA"])
  344. tm.assert_frame_equal(frame, recons)
  345. # GH 3611
  346. frame.to_excel(path, "test1", na_rep="88")
  347. recons = pd.read_excel(path, sheet_name="test1", index_col=0, na_values=["88"])
  348. tm.assert_frame_equal(frame, recons)
  349. frame.to_excel(path, "test1", na_rep="88")
  350. recons = pd.read_excel(
  351. path, sheet_name="test1", index_col=0, na_values=[88, 88.0]
  352. )
  353. tm.assert_frame_equal(frame, recons)
  354. # GH 6573
  355. frame.to_excel(path, "Sheet1")
  356. recons = pd.read_excel(path, index_col=0)
  357. tm.assert_frame_equal(frame, recons)
  358. frame.to_excel(path, "0")
  359. recons = pd.read_excel(path, index_col=0)
  360. tm.assert_frame_equal(frame, recons)
  361. # GH 8825 Pandas Series should provide to_excel method
  362. s = frame["A"]
  363. s.to_excel(path)
  364. recons = pd.read_excel(path, index_col=0)
  365. tm.assert_frame_equal(s.to_frame(), recons)
  366. def test_mixed(self, frame, path):
  367. mixed_frame = frame.copy()
  368. mixed_frame["foo"] = "bar"
  369. mixed_frame.to_excel(path, "test1")
  370. with ExcelFile(path) as reader:
  371. recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
  372. tm.assert_frame_equal(mixed_frame, recons)
  373. def test_ts_frame(self, tsframe, path):
  374. df = tsframe
  375. # freq doesn't round-trip
  376. index = pd.DatetimeIndex(np.asarray(df.index), freq=None)
  377. df.index = index
  378. df.to_excel(path, "test1")
  379. with ExcelFile(path) as reader:
  380. recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
  381. tm.assert_frame_equal(df, recons)
  382. def test_basics_with_nan(self, frame, path):
  383. frame = frame.copy()
  384. frame.iloc[:5, frame.columns.get_loc("A")] = np.nan
  385. frame.to_excel(path, "test1")
  386. frame.to_excel(path, "test1", columns=["A", "B"])
  387. frame.to_excel(path, "test1", header=False)
  388. frame.to_excel(path, "test1", index=False)
  389. @pytest.mark.parametrize("np_type", [np.int8, np.int16, np.int32, np.int64])
  390. def test_int_types(self, np_type, path):
  391. # Test np.int values read come back as int
  392. # (rather than float which is Excel's format).
  393. df = DataFrame(np.random.randint(-10, 10, size=(10, 2)), dtype=np_type)
  394. df.to_excel(path, "test1")
  395. with ExcelFile(path) as reader:
  396. recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
  397. int_frame = df.astype(np.int64)
  398. tm.assert_frame_equal(int_frame, recons)
  399. recons2 = pd.read_excel(path, sheet_name="test1", index_col=0)
  400. tm.assert_frame_equal(int_frame, recons2)
  401. @pytest.mark.parametrize("np_type", [np.float16, np.float32, np.float64])
  402. def test_float_types(self, np_type, path):
  403. # Test np.float values read come back as float.
  404. df = DataFrame(np.random.random_sample(10), dtype=np_type)
  405. df.to_excel(path, "test1")
  406. with ExcelFile(path) as reader:
  407. recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(
  408. np_type
  409. )
  410. tm.assert_frame_equal(df, recons)
  411. def test_bool_types(self, path):
  412. # Test np.bool_ values read come back as float.
  413. df = DataFrame([1, 0, True, False], dtype=np.bool_)
  414. df.to_excel(path, "test1")
  415. with ExcelFile(path) as reader:
  416. recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(
  417. np.bool_
  418. )
  419. tm.assert_frame_equal(df, recons)
  420. def test_inf_roundtrip(self, path):
  421. df = DataFrame([(1, np.inf), (2, 3), (5, -np.inf)])
  422. df.to_excel(path, "test1")
  423. with ExcelFile(path) as reader:
  424. recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
  425. tm.assert_frame_equal(df, recons)
  426. def test_sheets(self, frame, tsframe, path):
  427. # freq doesn't round-trip
  428. index = pd.DatetimeIndex(np.asarray(tsframe.index), freq=None)
  429. tsframe.index = index
  430. frame = frame.copy()
  431. frame.iloc[:5, frame.columns.get_loc("A")] = np.nan
  432. frame.to_excel(path, "test1")
  433. frame.to_excel(path, "test1", columns=["A", "B"])
  434. frame.to_excel(path, "test1", header=False)
  435. frame.to_excel(path, "test1", index=False)
  436. # Test writing to separate sheets
  437. with ExcelWriter(path) as writer:
  438. frame.to_excel(writer, "test1")
  439. tsframe.to_excel(writer, "test2")
  440. with ExcelFile(path) as reader:
  441. recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
  442. tm.assert_frame_equal(frame, recons)
  443. recons = pd.read_excel(reader, sheet_name="test2", index_col=0)
  444. tm.assert_frame_equal(tsframe, recons)
  445. assert 2 == len(reader.sheet_names)
  446. assert "test1" == reader.sheet_names[0]
  447. assert "test2" == reader.sheet_names[1]
  448. def test_colaliases(self, frame, path):
  449. frame = frame.copy()
  450. frame.iloc[:5, frame.columns.get_loc("A")] = np.nan
  451. frame.to_excel(path, "test1")
  452. frame.to_excel(path, "test1", columns=["A", "B"])
  453. frame.to_excel(path, "test1", header=False)
  454. frame.to_excel(path, "test1", index=False)
  455. # column aliases
  456. col_aliases = Index(["AA", "X", "Y", "Z"])
  457. frame.to_excel(path, "test1", header=col_aliases)
  458. with ExcelFile(path) as reader:
  459. rs = pd.read_excel(reader, sheet_name="test1", index_col=0)
  460. xp = frame.copy()
  461. xp.columns = col_aliases
  462. tm.assert_frame_equal(xp, rs)
  463. def test_roundtrip_indexlabels(self, merge_cells, frame, path):
  464. frame = frame.copy()
  465. frame.iloc[:5, frame.columns.get_loc("A")] = np.nan
  466. frame.to_excel(path, "test1")
  467. frame.to_excel(path, "test1", columns=["A", "B"])
  468. frame.to_excel(path, "test1", header=False)
  469. frame.to_excel(path, "test1", index=False)
  470. # test index_label
  471. df = DataFrame(np.random.randn(10, 2)) >= 0
  472. df.to_excel(path, "test1", index_label=["test"], merge_cells=merge_cells)
  473. with ExcelFile(path) as reader:
  474. recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(
  475. np.int64
  476. )
  477. df.index.names = ["test"]
  478. assert df.index.names == recons.index.names
  479. df = DataFrame(np.random.randn(10, 2)) >= 0
  480. df.to_excel(
  481. path,
  482. "test1",
  483. index_label=["test", "dummy", "dummy2"],
  484. merge_cells=merge_cells,
  485. )
  486. with ExcelFile(path) as reader:
  487. recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(
  488. np.int64
  489. )
  490. df.index.names = ["test"]
  491. assert df.index.names == recons.index.names
  492. df = DataFrame(np.random.randn(10, 2)) >= 0
  493. df.to_excel(path, "test1", index_label="test", merge_cells=merge_cells)
  494. with ExcelFile(path) as reader:
  495. recons = pd.read_excel(reader, sheet_name="test1", index_col=0).astype(
  496. np.int64
  497. )
  498. df.index.names = ["test"]
  499. tm.assert_frame_equal(df, recons.astype(bool))
  500. frame.to_excel(
  501. path,
  502. "test1",
  503. columns=["A", "B", "C", "D"],
  504. index=False,
  505. merge_cells=merge_cells,
  506. )
  507. # take 'A' and 'B' as indexes (same row as cols 'C', 'D')
  508. df = frame.copy()
  509. df = df.set_index(["A", "B"])
  510. with ExcelFile(path) as reader:
  511. recons = pd.read_excel(reader, sheet_name="test1", index_col=[0, 1])
  512. tm.assert_frame_equal(df, recons)
  513. def test_excel_roundtrip_indexname(self, merge_cells, path):
  514. df = DataFrame(np.random.randn(10, 4))
  515. df.index.name = "foo"
  516. df.to_excel(path, merge_cells=merge_cells)
  517. with ExcelFile(path) as xf:
  518. result = pd.read_excel(xf, sheet_name=xf.sheet_names[0], index_col=0)
  519. tm.assert_frame_equal(result, df)
  520. assert result.index.name == "foo"
  521. def test_excel_roundtrip_datetime(self, merge_cells, tsframe, path):
  522. # datetime.date, not sure what to test here exactly
  523. # freq does not round-trip
  524. index = pd.DatetimeIndex(np.asarray(tsframe.index), freq=None)
  525. tsframe.index = index
  526. tsf = tsframe.copy()
  527. tsf.index = [x.date() for x in tsframe.index]
  528. tsf.to_excel(path, "test1", merge_cells=merge_cells)
  529. with ExcelFile(path) as reader:
  530. recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
  531. tm.assert_frame_equal(tsframe, recons)
  532. def test_excel_date_datetime_format(self, ext, path):
  533. # see gh-4133
  534. #
  535. # Excel output format strings
  536. df = DataFrame(
  537. [
  538. [date(2014, 1, 31), date(1999, 9, 24)],
  539. [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)],
  540. ],
  541. index=["DATE", "DATETIME"],
  542. columns=["X", "Y"],
  543. )
  544. df_expected = DataFrame(
  545. [
  546. [datetime(2014, 1, 31), datetime(1999, 9, 24)],
  547. [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)],
  548. ],
  549. index=["DATE", "DATETIME"],
  550. columns=["X", "Y"],
  551. )
  552. with tm.ensure_clean(ext) as filename2:
  553. with ExcelWriter(path) as writer1:
  554. df.to_excel(writer1, "test1")
  555. with ExcelWriter(
  556. filename2,
  557. date_format="DD.MM.YYYY",
  558. datetime_format="DD.MM.YYYY HH-MM-SS",
  559. ) as writer2:
  560. df.to_excel(writer2, "test1")
  561. with ExcelFile(path) as reader1:
  562. rs1 = pd.read_excel(reader1, sheet_name="test1", index_col=0)
  563. with ExcelFile(filename2) as reader2:
  564. rs2 = pd.read_excel(reader2, sheet_name="test1", index_col=0)
  565. tm.assert_frame_equal(rs1, rs2)
  566. # Since the reader returns a datetime object for dates,
  567. # we need to use df_expected to check the result.
  568. tm.assert_frame_equal(rs2, df_expected)
  569. def test_to_excel_interval_no_labels(self, path):
  570. # see gh-19242
  571. #
  572. # Test writing Interval without labels.
  573. df = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64)
  574. expected = df.copy()
  575. df["new"] = pd.cut(df[0], 10)
  576. expected["new"] = pd.cut(expected[0], 10).astype(str)
  577. df.to_excel(path, "test1")
  578. with ExcelFile(path) as reader:
  579. recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
  580. tm.assert_frame_equal(expected, recons)
  581. def test_to_excel_interval_labels(self, path):
  582. # see gh-19242
  583. #
  584. # Test writing Interval with labels.
  585. df = DataFrame(np.random.randint(-10, 10, size=(20, 1)), dtype=np.int64)
  586. expected = df.copy()
  587. intervals = pd.cut(
  588. df[0], 10, labels=["A", "B", "C", "D", "E", "F", "G", "H", "I", "J"]
  589. )
  590. df["new"] = intervals
  591. expected["new"] = pd.Series(list(intervals))
  592. df.to_excel(path, "test1")
  593. with ExcelFile(path) as reader:
  594. recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
  595. tm.assert_frame_equal(expected, recons)
  596. def test_to_excel_timedelta(self, path):
  597. # see gh-19242, gh-9155
  598. #
  599. # Test writing timedelta to xls.
  600. df = DataFrame(
  601. np.random.randint(-10, 10, size=(20, 1)), columns=["A"], dtype=np.int64
  602. )
  603. expected = df.copy()
  604. df["new"] = df["A"].apply(lambda x: timedelta(seconds=x))
  605. expected["new"] = expected["A"].apply(
  606. lambda x: timedelta(seconds=x).total_seconds() / 86400
  607. )
  608. df.to_excel(path, "test1")
  609. with ExcelFile(path) as reader:
  610. recons = pd.read_excel(reader, sheet_name="test1", index_col=0)
  611. tm.assert_frame_equal(expected, recons)
  612. def test_to_excel_periodindex(self, tsframe, path):
  613. xp = tsframe.resample("M", kind="period").mean()
  614. xp.to_excel(path, "sht1")
  615. with ExcelFile(path) as reader:
  616. rs = pd.read_excel(reader, sheet_name="sht1", index_col=0)
  617. tm.assert_frame_equal(xp, rs.to_period("M"))
  618. def test_to_excel_multiindex(self, merge_cells, frame, path):
  619. arrays = np.arange(len(frame.index) * 2, dtype=np.int64).reshape(2, -1)
  620. new_index = MultiIndex.from_arrays(arrays, names=["first", "second"])
  621. frame.index = new_index
  622. frame.to_excel(path, "test1", header=False)
  623. frame.to_excel(path, "test1", columns=["A", "B"])
  624. # round trip
  625. frame.to_excel(path, "test1", merge_cells=merge_cells)
  626. with ExcelFile(path) as reader:
  627. df = pd.read_excel(reader, sheet_name="test1", index_col=[0, 1])
  628. tm.assert_frame_equal(frame, df)
  629. # GH13511
  630. def test_to_excel_multiindex_nan_label(self, merge_cells, path):
  631. df = DataFrame({"A": [None, 2, 3], "B": [10, 20, 30], "C": np.random.sample(3)})
  632. df = df.set_index(["A", "B"])
  633. df.to_excel(path, merge_cells=merge_cells)
  634. df1 = pd.read_excel(path, index_col=[0, 1])
  635. tm.assert_frame_equal(df, df1)
  636. # Test for Issue 11328. If column indices are integers, make
  637. # sure they are handled correctly for either setting of
  638. # merge_cells
  639. def test_to_excel_multiindex_cols(self, merge_cells, frame, path):
  640. arrays = np.arange(len(frame.index) * 2, dtype=np.int64).reshape(2, -1)
  641. new_index = MultiIndex.from_arrays(arrays, names=["first", "second"])
  642. frame.index = new_index
  643. new_cols_index = MultiIndex.from_tuples([(40, 1), (40, 2), (50, 1), (50, 2)])
  644. frame.columns = new_cols_index
  645. header = [0, 1]
  646. if not merge_cells:
  647. header = 0
  648. # round trip
  649. frame.to_excel(path, "test1", merge_cells=merge_cells)
  650. with ExcelFile(path) as reader:
  651. df = pd.read_excel(
  652. reader, sheet_name="test1", header=header, index_col=[0, 1]
  653. )
  654. if not merge_cells:
  655. fm = frame.columns.format(sparsify=False, adjoin=False, names=False)
  656. frame.columns = [".".join(map(str, q)) for q in zip(*fm)]
  657. tm.assert_frame_equal(frame, df)
  658. def test_to_excel_multiindex_dates(self, merge_cells, tsframe, path):
  659. # try multiindex with dates
  660. new_index = [tsframe.index, np.arange(len(tsframe.index), dtype=np.int64)]
  661. tsframe.index = MultiIndex.from_arrays(new_index)
  662. tsframe.index.names = ["time", "foo"]
  663. tsframe.to_excel(path, "test1", merge_cells=merge_cells)
  664. with ExcelFile(path) as reader:
  665. recons = pd.read_excel(reader, sheet_name="test1", index_col=[0, 1])
  666. tm.assert_frame_equal(tsframe, recons)
  667. assert recons.index.names == ("time", "foo")
  668. def test_to_excel_multiindex_no_write_index(self, path):
  669. # Test writing and re-reading a MI without the index. GH 5616.
  670. # Initial non-MI frame.
  671. frame1 = DataFrame({"a": [10, 20], "b": [30, 40], "c": [50, 60]})
  672. # Add a MI.
  673. frame2 = frame1.copy()
  674. multi_index = MultiIndex.from_tuples([(70, 80), (90, 100)])
  675. frame2.index = multi_index
  676. # Write out to Excel without the index.
  677. frame2.to_excel(path, "test1", index=False)
  678. # Read it back in.
  679. with ExcelFile(path) as reader:
  680. frame3 = pd.read_excel(reader, sheet_name="test1")
  681. # Test that it is the same as the initial frame.
  682. tm.assert_frame_equal(frame1, frame3)
  683. def test_to_excel_empty_multiindex(self, path):
  684. # GH 19543.
  685. expected = DataFrame([], columns=[0, 1, 2])
  686. df = DataFrame([], index=MultiIndex.from_tuples([], names=[0, 1]), columns=[2])
  687. df.to_excel(path, "test1")
  688. with ExcelFile(path) as reader:
  689. result = pd.read_excel(reader, sheet_name="test1")
  690. tm.assert_frame_equal(
  691. result, expected, check_index_type=False, check_dtype=False
  692. )
  693. def test_to_excel_float_format(self, path):
  694. df = DataFrame(
  695. [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
  696. index=["A", "B"],
  697. columns=["X", "Y", "Z"],
  698. )
  699. df.to_excel(path, "test1", float_format="%.2f")
  700. with ExcelFile(path) as reader:
  701. result = pd.read_excel(reader, sheet_name="test1", index_col=0)
  702. expected = DataFrame(
  703. [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]],
  704. index=["A", "B"],
  705. columns=["X", "Y", "Z"],
  706. )
  707. tm.assert_frame_equal(result, expected)
  708. def test_to_excel_output_encoding(self, ext):
  709. # Avoid mixed inferred_type.
  710. df = DataFrame(
  711. [["\u0192", "\u0193", "\u0194"], ["\u0195", "\u0196", "\u0197"]],
  712. index=["A\u0192", "B"],
  713. columns=["X\u0193", "Y", "Z"],
  714. )
  715. with tm.ensure_clean("__tmp_to_excel_float_format__." + ext) as filename:
  716. df.to_excel(filename, sheet_name="TestSheet")
  717. result = pd.read_excel(filename, sheet_name="TestSheet", index_col=0)
  718. tm.assert_frame_equal(result, df)
  719. def test_to_excel_unicode_filename(self, ext):
  720. with tm.ensure_clean("\u0192u." + ext) as filename:
  721. try:
  722. with open(filename, "wb"):
  723. pass
  724. except UnicodeEncodeError:
  725. pytest.skip("No unicode file names on this system")
  726. df = DataFrame(
  727. [[0.123456, 0.234567, 0.567567], [12.32112, 123123.2, 321321.2]],
  728. index=["A", "B"],
  729. columns=["X", "Y", "Z"],
  730. )
  731. df.to_excel(filename, "test1", float_format="%.2f")
  732. with ExcelFile(filename) as reader:
  733. result = pd.read_excel(reader, sheet_name="test1", index_col=0)
  734. expected = DataFrame(
  735. [[0.12, 0.23, 0.57], [12.32, 123123.20, 321321.20]],
  736. index=["A", "B"],
  737. columns=["X", "Y", "Z"],
  738. )
  739. tm.assert_frame_equal(result, expected)
  740. @pytest.mark.parametrize("use_headers", [True, False])
  741. @pytest.mark.parametrize("r_idx_nlevels", [1, 2, 3])
  742. @pytest.mark.parametrize("c_idx_nlevels", [1, 2, 3])
  743. def test_excel_010_hemstring(
  744. self, merge_cells, c_idx_nlevels, r_idx_nlevels, use_headers, path
  745. ):
  746. def roundtrip(data, header=True, parser_hdr=0, index=True):
  747. data.to_excel(path, header=header, merge_cells=merge_cells, index=index)
  748. with ExcelFile(path) as xf:
  749. return pd.read_excel(
  750. xf, sheet_name=xf.sheet_names[0], header=parser_hdr
  751. )
  752. # Basic test.
  753. parser_header = 0 if use_headers else None
  754. res = roundtrip(DataFrame([0]), use_headers, parser_header)
  755. assert res.shape == (1, 2)
  756. assert res.iloc[0, 0] is not np.nan
  757. # More complex tests with multi-index.
  758. nrows = 5
  759. ncols = 3
  760. # ensure limited functionality in 0.10
  761. # override of gh-2370 until sorted out in 0.11
  762. df = tm.makeCustomDataframe(
  763. nrows, ncols, r_idx_nlevels=r_idx_nlevels, c_idx_nlevels=c_idx_nlevels
  764. )
  765. # This if will be removed once multi-column Excel writing
  766. # is implemented. For now fixing gh-9794.
  767. if c_idx_nlevels > 1:
  768. msg = (
  769. "Writing to Excel with MultiIndex columns and no index "
  770. "\\('index'=False\\) is not yet implemented."
  771. )
  772. with pytest.raises(NotImplementedError, match=msg):
  773. roundtrip(df, use_headers, index=False)
  774. else:
  775. res = roundtrip(df, use_headers)
  776. if use_headers:
  777. assert res.shape == (nrows, ncols + r_idx_nlevels)
  778. else:
  779. # First row taken as columns.
  780. assert res.shape == (nrows - 1, ncols + r_idx_nlevels)
  781. # No NaNs.
  782. for r in range(len(res.index)):
  783. for c in range(len(res.columns)):
  784. assert res.iloc[r, c] is not np.nan
  785. def test_duplicated_columns(self, path):
  786. # see gh-5235
  787. df = DataFrame([[1, 2, 3], [1, 2, 3], [1, 2, 3]], columns=["A", "B", "B"])
  788. df.to_excel(path, "test1")
  789. expected = DataFrame(
  790. [[1, 2, 3], [1, 2, 3], [1, 2, 3]], columns=["A", "B", "B.1"]
  791. )
  792. # By default, we mangle.
  793. result = pd.read_excel(path, sheet_name="test1", index_col=0)
  794. tm.assert_frame_equal(result, expected)
  795. # see gh-11007, gh-10970
  796. df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "A", "B"])
  797. df.to_excel(path, "test1")
  798. result = pd.read_excel(path, sheet_name="test1", index_col=0)
  799. expected = DataFrame(
  800. [[1, 2, 3, 4], [5, 6, 7, 8]], columns=["A", "B", "A.1", "B.1"]
  801. )
  802. tm.assert_frame_equal(result, expected)
  803. # see gh-10982
  804. df.to_excel(path, "test1", index=False, header=False)
  805. result = pd.read_excel(path, sheet_name="test1", header=None)
  806. expected = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]])
  807. tm.assert_frame_equal(result, expected)
  808. def test_swapped_columns(self, path):
  809. # Test for issue #5427.
  810. write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2]})
  811. write_frame.to_excel(path, "test1", columns=["B", "A"])
  812. read_frame = pd.read_excel(path, sheet_name="test1", header=0)
  813. tm.assert_series_equal(write_frame["A"], read_frame["A"])
  814. tm.assert_series_equal(write_frame["B"], read_frame["B"])
  815. def test_invalid_columns(self, path):
  816. # see gh-10982
  817. write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2]})
  818. with pytest.raises(KeyError, match="Not all names specified"):
  819. write_frame.to_excel(path, "test1", columns=["B", "C"])
  820. with pytest.raises(
  821. KeyError, match="'passes columns are not ALL present dataframe'"
  822. ):
  823. write_frame.to_excel(path, "test1", columns=["C", "D"])
  824. @pytest.mark.parametrize(
  825. "to_excel_index,read_excel_index_col",
  826. [
  827. (True, 0), # Include index in write to file
  828. (False, None), # Dont include index in write to file
  829. ],
  830. )
  831. def test_write_subset_columns(self, path, to_excel_index, read_excel_index_col):
  832. # GH 31677
  833. write_frame = DataFrame({"A": [1, 1, 1], "B": [2, 2, 2], "C": [3, 3, 3]})
  834. write_frame.to_excel(
  835. path, "col_subset_bug", columns=["A", "B"], index=to_excel_index
  836. )
  837. expected = write_frame[["A", "B"]]
  838. read_frame = pd.read_excel(
  839. path, sheet_name="col_subset_bug", index_col=read_excel_index_col
  840. )
  841. tm.assert_frame_equal(expected, read_frame)
  842. def test_comment_arg(self, path):
  843. # see gh-18735
  844. #
  845. # Test the comment argument functionality to pd.read_excel.
  846. # Create file to read in.
  847. df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]})
  848. df.to_excel(path, "test_c")
  849. # Read file without comment arg.
  850. result1 = pd.read_excel(path, sheet_name="test_c", index_col=0)
  851. result1.iloc[1, 0] = None
  852. result1.iloc[1, 1] = None
  853. result1.iloc[2, 1] = None
  854. result2 = pd.read_excel(path, sheet_name="test_c", comment="#", index_col=0)
  855. tm.assert_frame_equal(result1, result2)
  856. def test_comment_default(self, path):
  857. # Re issue #18735
  858. # Test the comment argument default to pd.read_excel
  859. # Create file to read in
  860. df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]})
  861. df.to_excel(path, "test_c")
  862. # Read file with default and explicit comment=None
  863. result1 = pd.read_excel(path, sheet_name="test_c")
  864. result2 = pd.read_excel(path, sheet_name="test_c", comment=None)
  865. tm.assert_frame_equal(result1, result2)
  866. def test_comment_used(self, path):
  867. # see gh-18735
  868. #
  869. # Test the comment argument is working as expected when used.
  870. # Create file to read in.
  871. df = DataFrame({"A": ["one", "#one", "one"], "B": ["two", "two", "#two"]})
  872. df.to_excel(path, "test_c")
  873. # Test read_frame_comment against manually produced expected output.
  874. expected = DataFrame({"A": ["one", None, "one"], "B": ["two", None, None]})
  875. result = pd.read_excel(path, sheet_name="test_c", comment="#", index_col=0)
  876. tm.assert_frame_equal(result, expected)
  877. def test_comment_empty_line(self, path):
  878. # Re issue #18735
  879. # Test that pd.read_excel ignores commented lines at the end of file
  880. df = DataFrame({"a": ["1", "#2"], "b": ["2", "3"]})
  881. df.to_excel(path, index=False)
  882. # Test that all-comment lines at EoF are ignored
  883. expected = DataFrame({"a": [1], "b": [2]})
  884. result = pd.read_excel(path, comment="#")
  885. tm.assert_frame_equal(result, expected)
  886. def test_datetimes(self, path):
  887. # Test writing and reading datetimes. For issue #9139. (xref #9185)
  888. datetimes = [
  889. datetime(2013, 1, 13, 1, 2, 3),
  890. datetime(2013, 1, 13, 2, 45, 56),
  891. datetime(2013, 1, 13, 4, 29, 49),
  892. datetime(2013, 1, 13, 6, 13, 42),
  893. datetime(2013, 1, 13, 7, 57, 35),
  894. datetime(2013, 1, 13, 9, 41, 28),
  895. datetime(2013, 1, 13, 11, 25, 21),
  896. datetime(2013, 1, 13, 13, 9, 14),
  897. datetime(2013, 1, 13, 14, 53, 7),
  898. datetime(2013, 1, 13, 16, 37, 0),
  899. datetime(2013, 1, 13, 18, 20, 52),
  900. ]
  901. write_frame = DataFrame({"A": datetimes})
  902. write_frame.to_excel(path, "Sheet1")
  903. read_frame = pd.read_excel(path, sheet_name="Sheet1", header=0)
  904. tm.assert_series_equal(write_frame["A"], read_frame["A"])
  905. def test_bytes_io(self, engine):
  906. # see gh-7074
  907. with BytesIO() as bio:
  908. df = DataFrame(np.random.randn(10, 2))
  909. # Pass engine explicitly, as there is no file path to infer from.
  910. with ExcelWriter(bio, engine=engine) as writer:
  911. df.to_excel(writer)
  912. bio.seek(0)
  913. reread_df = pd.read_excel(bio, index_col=0)
  914. tm.assert_frame_equal(df, reread_df)
  915. def test_write_lists_dict(self, path):
  916. # see gh-8188.
  917. df = DataFrame(
  918. {
  919. "mixed": ["a", ["b", "c"], {"d": "e", "f": 2}],
  920. "numeric": [1, 2, 3.0],
  921. "str": ["apple", "banana", "cherry"],
  922. }
  923. )
  924. df.to_excel(path, "Sheet1")
  925. read = pd.read_excel(path, sheet_name="Sheet1", header=0, index_col=0)
  926. expected = df.copy()
  927. expected.mixed = expected.mixed.apply(str)
  928. expected.numeric = expected.numeric.astype("int64")
  929. tm.assert_frame_equal(read, expected)
  930. def test_render_as_column_name(self, path):
  931. # see gh-34331
  932. df = DataFrame({"render": [1, 2], "data": [3, 4]})
  933. df.to_excel(path, "Sheet1")
  934. read = pd.read_excel(path, "Sheet1", index_col=0)
  935. expected = df
  936. tm.assert_frame_equal(read, expected)
  937. def test_true_and_false_value_options(self, path):
  938. # see gh-13347
  939. df = DataFrame([["foo", "bar"]], columns=["col1", "col2"])
  940. expected = df.replace({"foo": True, "bar": False})
  941. df.to_excel(path)
  942. read_frame = pd.read_excel(
  943. path, true_values=["foo"], false_values=["bar"], index_col=0
  944. )
  945. tm.assert_frame_equal(read_frame, expected)
  946. def test_freeze_panes(self, path):
  947. # see gh-15160
  948. expected = DataFrame([[1, 2], [3, 4]], columns=["col1", "col2"])
  949. expected.to_excel(path, "Sheet1", freeze_panes=(1, 1))
  950. result = pd.read_excel(path, index_col=0)
  951. tm.assert_frame_equal(result, expected)
  952. def test_path_path_lib(self, engine, ext):
  953. df = tm.makeDataFrame()
  954. writer = partial(df.to_excel, engine=engine)
  955. reader = partial(pd.read_excel, index_col=0)
  956. result = tm.round_trip_pathlib(writer, reader, path=f"foo{ext}")
  957. tm.assert_frame_equal(result, df)
  958. def test_path_local_path(self, engine, ext):
  959. df = tm.makeDataFrame()
  960. writer = partial(df.to_excel, engine=engine)
  961. reader = partial(pd.read_excel, index_col=0)
  962. result = tm.round_trip_localpath(writer, reader, path=f"foo{ext}")
  963. tm.assert_frame_equal(result, df)
  964. def test_merged_cell_custom_objects(self, path):
  965. # see GH-27006
  966. mi = MultiIndex.from_tuples(
  967. [
  968. (pd.Period("2018"), pd.Period("2018Q1")),
  969. (pd.Period("2018"), pd.Period("2018Q2")),
  970. ]
  971. )
  972. expected = DataFrame(np.ones((2, 2), dtype="int64"), columns=mi)
  973. expected.to_excel(path)
  974. result = pd.read_excel(path, header=[0, 1], index_col=0)
  975. # need to convert PeriodIndexes to standard Indexes for assert equal
  976. expected.columns = expected.columns.set_levels(
  977. [[str(i) for i in mi.levels[0]], [str(i) for i in mi.levels[1]]],
  978. level=[0, 1],
  979. )
  980. tm.assert_frame_equal(result, expected)
  981. @pytest.mark.parametrize("dtype", [None, object])
  982. def test_raise_when_saving_timezones(self, dtype, tz_aware_fixture, path):
  983. # GH 27008, GH 7056
  984. tz = tz_aware_fixture
  985. data = pd.Timestamp("2019", tz=tz)
  986. df = DataFrame([data], dtype=dtype)
  987. with pytest.raises(ValueError, match="Excel does not support"):
  988. df.to_excel(path)
  989. data = data.to_pydatetime()
  990. df = DataFrame([data], dtype=dtype)
  991. with pytest.raises(ValueError, match="Excel does not support"):
  992. df.to_excel(path)
  993. def test_excel_duplicate_columns_with_names(self, path):
  994. # GH#39695
  995. df = DataFrame({"A": [0, 1], "B": [10, 11]})
  996. df.to_excel(path, columns=["A", "B", "A"], index=False)
  997. result = pd.read_excel(path)
  998. expected = DataFrame([[0, 10, 0], [1, 11, 1]], columns=["A", "B", "A.1"])
  999. tm.assert_frame_equal(result, expected)
  1000. def test_if_sheet_exists_raises(self, ext):
  1001. # GH 40230
  1002. msg = "if_sheet_exists is only valid in append mode (mode='a')"
  1003. with tm.ensure_clean(ext) as f:
  1004. with pytest.raises(ValueError, match=re.escape(msg)):
  1005. ExcelWriter(f, if_sheet_exists="replace")
  1006. def test_excel_writer_empty_frame(self, engine, ext):
  1007. # GH#45793
  1008. with tm.ensure_clean(ext) as path:
  1009. with ExcelWriter(path, engine=engine) as writer:
  1010. DataFrame().to_excel(writer)
  1011. result = pd.read_excel(path)
  1012. expected = DataFrame()
  1013. tm.assert_frame_equal(result, expected)
  1014. def test_to_excel_empty_frame(self, engine, ext):
  1015. # GH#45793
  1016. with tm.ensure_clean(ext) as path:
  1017. DataFrame().to_excel(path, engine=engine)
  1018. result = pd.read_excel(path)
  1019. expected = DataFrame()
  1020. tm.assert_frame_equal(result, expected)
  1021. class TestExcelWriterEngineTests:
  1022. @pytest.mark.parametrize(
  1023. "klass,ext",
  1024. [
  1025. pytest.param(_XlsxWriter, ".xlsx", marks=td.skip_if_no("xlsxwriter")),
  1026. pytest.param(_OpenpyxlWriter, ".xlsx", marks=td.skip_if_no("openpyxl")),
  1027. ],
  1028. )
  1029. def test_ExcelWriter_dispatch(self, klass, ext):
  1030. with tm.ensure_clean(ext) as path:
  1031. with ExcelWriter(path) as writer:
  1032. if ext == ".xlsx" and td.safe_import("xlsxwriter"):
  1033. # xlsxwriter has preference over openpyxl if both installed
  1034. assert isinstance(writer, _XlsxWriter)
  1035. else:
  1036. assert isinstance(writer, klass)
  1037. def test_ExcelWriter_dispatch_raises(self):
  1038. with pytest.raises(ValueError, match="No engine"):
  1039. ExcelWriter("nothing")
  1040. def test_register_writer(self):
  1041. class DummyClass(ExcelWriter):
  1042. called_save = False
  1043. called_write_cells = False
  1044. called_sheets = False
  1045. _supported_extensions = ("xlsx", "xls")
  1046. _engine = "dummy"
  1047. def book(self):
  1048. pass
  1049. def _save(self):
  1050. type(self).called_save = True
  1051. def _write_cells(self, *args, **kwargs):
  1052. type(self).called_write_cells = True
  1053. @property
  1054. def sheets(self):
  1055. type(self).called_sheets = True
  1056. @classmethod
  1057. def assert_called_and_reset(cls):
  1058. assert cls.called_save
  1059. assert cls.called_write_cells
  1060. assert not cls.called_sheets
  1061. cls.called_save = False
  1062. cls.called_write_cells = False
  1063. register_writer(DummyClass)
  1064. with option_context("io.excel.xlsx.writer", "dummy"):
  1065. path = "something.xlsx"
  1066. with tm.ensure_clean(path) as filepath:
  1067. with ExcelWriter(filepath) as writer:
  1068. assert isinstance(writer, DummyClass)
  1069. df = tm.makeCustomDataframe(1, 1)
  1070. df.to_excel(filepath)
  1071. DummyClass.assert_called_and_reset()
  1072. with tm.ensure_clean("something.xls") as filepath:
  1073. df.to_excel(filepath, engine="dummy")
  1074. DummyClass.assert_called_and_reset()
  1075. @td.skip_if_no("xlrd")
  1076. @td.skip_if_no("openpyxl")
  1077. class TestFSPath:
  1078. def test_excelfile_fspath(self):
  1079. with tm.ensure_clean("foo.xlsx") as path:
  1080. df = DataFrame({"A": [1, 2]})
  1081. df.to_excel(path)
  1082. with ExcelFile(path) as xl:
  1083. result = os.fspath(xl)
  1084. assert result == path
  1085. def test_excelwriter_fspath(self):
  1086. with tm.ensure_clean("foo.xlsx") as path:
  1087. with ExcelWriter(path) as writer:
  1088. assert os.fspath(writer) == str(path)
  1089. @pytest.mark.parametrize("klass", _writers.values())
  1090. def test_subclass_attr(klass):
  1091. # testing that subclasses of ExcelWriter don't have public attributes (issue 49602)
  1092. attrs_base = {name for name in dir(ExcelWriter) if not name.startswith("_")}
  1093. attrs_klass = {name for name in dir(klass) if not name.startswith("_")}
  1094. assert not attrs_base.symmetric_difference(attrs_klass)