test_to_html.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898
  1. from datetime import datetime
  2. from io import StringIO
  3. import re
  4. import numpy as np
  5. import pytest
  6. import pandas as pd
  7. from pandas import (
  8. DataFrame,
  9. Index,
  10. MultiIndex,
  11. option_context,
  12. )
  13. import pandas._testing as tm
  14. import pandas.io.formats.format as fmt
  15. lorem_ipsum = (
  16. "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod "
  17. "tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim "
  18. "veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex "
  19. "ea commodo consequat. Duis aute irure dolor in reprehenderit in "
  20. "voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur "
  21. "sint occaecat cupidatat non proident, sunt in culpa qui officia "
  22. "deserunt mollit anim id est laborum."
  23. )
  24. def expected_html(datapath, name):
  25. """
  26. Read HTML file from formats data directory.
  27. Parameters
  28. ----------
  29. datapath : pytest fixture
  30. The datapath fixture injected into a test by pytest.
  31. name : str
  32. The name of the HTML file without the suffix.
  33. Returns
  34. -------
  35. str : contents of HTML file.
  36. """
  37. filename = ".".join([name, "html"])
  38. filepath = datapath("io", "formats", "data", "html", filename)
  39. with open(filepath, encoding="utf-8") as f:
  40. html = f.read()
  41. return html.rstrip()
  42. @pytest.fixture(params=["mixed", "empty"])
  43. def biggie_df_fixture(request):
  44. """Fixture for a big mixed Dataframe and an empty Dataframe"""
  45. if request.param == "mixed":
  46. df = DataFrame(
  47. {"A": np.random.randn(200), "B": tm.makeStringIndex(200)},
  48. index=np.arange(200),
  49. )
  50. df.loc[:20, "A"] = np.nan
  51. df.loc[:20, "B"] = np.nan
  52. return df
  53. elif request.param == "empty":
  54. df = DataFrame(index=np.arange(200))
  55. return df
  56. @pytest.fixture(params=fmt._VALID_JUSTIFY_PARAMETERS)
  57. def justify(request):
  58. return request.param
  59. @pytest.mark.parametrize("col_space", [30, 50])
  60. def test_to_html_with_col_space(col_space):
  61. df = DataFrame(np.random.random(size=(1, 3)))
  62. # check that col_space affects HTML generation
  63. # and be very brittle about it.
  64. result = df.to_html(col_space=col_space)
  65. hdrs = [x for x in result.split(r"\n") if re.search(r"<th[>\s]", x)]
  66. assert len(hdrs) > 0
  67. for h in hdrs:
  68. assert "min-width" in h
  69. assert str(col_space) in h
  70. def test_to_html_with_column_specific_col_space_raises():
  71. df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"])
  72. msg = (
  73. "Col_space length\\(\\d+\\) should match "
  74. "DataFrame number of columns\\(\\d+\\)"
  75. )
  76. with pytest.raises(ValueError, match=msg):
  77. df.to_html(col_space=[30, 40])
  78. with pytest.raises(ValueError, match=msg):
  79. df.to_html(col_space=[30, 40, 50, 60])
  80. msg = "unknown column"
  81. with pytest.raises(ValueError, match=msg):
  82. df.to_html(col_space={"a": "foo", "b": 23, "d": 34})
  83. def test_to_html_with_column_specific_col_space():
  84. df = DataFrame(np.random.random(size=(3, 3)), columns=["a", "b", "c"])
  85. result = df.to_html(col_space={"a": "2em", "b": 23})
  86. hdrs = [x for x in result.split("\n") if re.search(r"<th[>\s]", x)]
  87. assert 'min-width: 2em;">a</th>' in hdrs[1]
  88. assert 'min-width: 23px;">b</th>' in hdrs[2]
  89. assert "<th>c</th>" in hdrs[3]
  90. result = df.to_html(col_space=["1em", 2, 3])
  91. hdrs = [x for x in result.split("\n") if re.search(r"<th[>\s]", x)]
  92. assert 'min-width: 1em;">a</th>' in hdrs[1]
  93. assert 'min-width: 2px;">b</th>' in hdrs[2]
  94. assert 'min-width: 3px;">c</th>' in hdrs[3]
  95. def test_to_html_with_empty_string_label():
  96. # GH 3547, to_html regards empty string labels as repeated labels
  97. data = {"c1": ["a", "b"], "c2": ["a", ""], "data": [1, 2]}
  98. df = DataFrame(data).set_index(["c1", "c2"])
  99. result = df.to_html()
  100. assert "rowspan" not in result
  101. @pytest.mark.parametrize(
  102. "df,expected",
  103. [
  104. (DataFrame({"\u03c3": np.arange(10.0)}), "unicode_1"),
  105. (DataFrame({"A": ["\u03c3"]}), "unicode_2"),
  106. ],
  107. )
  108. def test_to_html_unicode(df, expected, datapath):
  109. expected = expected_html(datapath, expected)
  110. result = df.to_html()
  111. assert result == expected
  112. def test_to_html_encoding(float_frame, tmp_path):
  113. # GH 28663
  114. path = tmp_path / "test.html"
  115. float_frame.to_html(path, encoding="gbk")
  116. with open(str(path), encoding="gbk") as f:
  117. assert float_frame.to_html() == f.read()
  118. def test_to_html_decimal(datapath):
  119. # GH 12031
  120. df = DataFrame({"A": [6.0, 3.1, 2.2]})
  121. result = df.to_html(decimal=",")
  122. expected = expected_html(datapath, "gh12031_expected_output")
  123. assert result == expected
  124. @pytest.mark.parametrize(
  125. "kwargs,string,expected",
  126. [
  127. ({}, "<type 'str'>", "escaped"),
  128. ({"escape": False}, "<b>bold</b>", "escape_disabled"),
  129. ],
  130. )
  131. def test_to_html_escaped(kwargs, string, expected, datapath):
  132. a = "str<ing1 &amp;"
  133. b = "stri>ng2 &amp;"
  134. test_dict = {"co<l1": {a: string, b: string}, "co>l2": {a: string, b: string}}
  135. result = DataFrame(test_dict).to_html(**kwargs)
  136. expected = expected_html(datapath, expected)
  137. assert result == expected
  138. @pytest.mark.parametrize("index_is_named", [True, False])
  139. def test_to_html_multiindex_index_false(index_is_named, datapath):
  140. # GH 8452
  141. df = DataFrame(
  142. {"a": range(2), "b": range(3, 5), "c": range(5, 7), "d": range(3, 5)}
  143. )
  144. df.columns = MultiIndex.from_product([["a", "b"], ["c", "d"]])
  145. if index_is_named:
  146. df.index = Index(df.index.values, name="idx")
  147. result = df.to_html(index=False)
  148. expected = expected_html(datapath, "gh8452_expected_output")
  149. assert result == expected
  150. @pytest.mark.parametrize(
  151. "multi_sparse,expected",
  152. [
  153. (False, "multiindex_sparsify_false_multi_sparse_1"),
  154. (False, "multiindex_sparsify_false_multi_sparse_2"),
  155. (True, "multiindex_sparsify_1"),
  156. (True, "multiindex_sparsify_2"),
  157. ],
  158. )
  159. def test_to_html_multiindex_sparsify(multi_sparse, expected, datapath):
  160. index = MultiIndex.from_arrays([[0, 0, 1, 1], [0, 1, 0, 1]], names=["foo", None])
  161. df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], index=index)
  162. if expected.endswith("2"):
  163. df.columns = index[::2]
  164. with option_context("display.multi_sparse", multi_sparse):
  165. result = df.to_html()
  166. expected = expected_html(datapath, expected)
  167. assert result == expected
  168. @pytest.mark.parametrize(
  169. "max_rows,expected",
  170. [
  171. (60, "gh14882_expected_output_1"),
  172. # Test that ... appears in a middle level
  173. (56, "gh14882_expected_output_2"),
  174. ],
  175. )
  176. def test_to_html_multiindex_odd_even_truncate(max_rows, expected, datapath):
  177. # GH 14882 - Issue on truncation with odd length DataFrame
  178. index = MultiIndex.from_product(
  179. [[100, 200, 300], [10, 20, 30], [1, 2, 3, 4, 5, 6, 7]], names=["a", "b", "c"]
  180. )
  181. df = DataFrame({"n": range(len(index))}, index=index)
  182. result = df.to_html(max_rows=max_rows)
  183. expected = expected_html(datapath, expected)
  184. assert result == expected
  185. @pytest.mark.parametrize(
  186. "df,formatters,expected",
  187. [
  188. (
  189. DataFrame(
  190. [[0, 1], [2, 3], [4, 5], [6, 7]],
  191. columns=["foo", None],
  192. index=np.arange(4),
  193. ),
  194. {"__index__": lambda x: "abcd"[x]},
  195. "index_formatter",
  196. ),
  197. (
  198. DataFrame({"months": [datetime(2016, 1, 1), datetime(2016, 2, 2)]}),
  199. {"months": lambda x: x.strftime("%Y-%m")},
  200. "datetime64_monthformatter",
  201. ),
  202. (
  203. DataFrame(
  204. {
  205. "hod": pd.to_datetime(
  206. ["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f"
  207. )
  208. }
  209. ),
  210. {"hod": lambda x: x.strftime("%H:%M")},
  211. "datetime64_hourformatter",
  212. ),
  213. (
  214. DataFrame(
  215. {
  216. "i": pd.Series([1, 2], dtype="int64"),
  217. "f": pd.Series([1, 2], dtype="float64"),
  218. "I": pd.Series([1, 2], dtype="Int64"),
  219. "s": pd.Series([1, 2], dtype="string"),
  220. "b": pd.Series([True, False], dtype="boolean"),
  221. "c": pd.Series(["a", "b"], dtype=pd.CategoricalDtype(["a", "b"])),
  222. "o": pd.Series([1, "2"], dtype=object),
  223. }
  224. ),
  225. [lambda x: "formatted"] * 7,
  226. "various_dtypes_formatted",
  227. ),
  228. ],
  229. )
  230. def test_to_html_formatters(df, formatters, expected, datapath):
  231. expected = expected_html(datapath, expected)
  232. result = df.to_html(formatters=formatters)
  233. assert result == expected
  234. def test_to_html_regression_GH6098():
  235. df = DataFrame(
  236. {
  237. "clé1": ["a", "a", "b", "b", "a"],
  238. "clé2": ["1er", "2ème", "1er", "2ème", "1er"],
  239. "données1": np.random.randn(5),
  240. "données2": np.random.randn(5),
  241. }
  242. )
  243. # it works
  244. df.pivot_table(index=["clé1"], columns=["clé2"])._repr_html_()
  245. def test_to_html_truncate(datapath):
  246. index = pd.date_range(start="20010101", freq="D", periods=20)
  247. df = DataFrame(index=index, columns=range(20))
  248. result = df.to_html(max_rows=8, max_cols=4)
  249. expected = expected_html(datapath, "truncate")
  250. assert result == expected
  251. @pytest.mark.parametrize("size", [1, 5])
  252. def test_html_invalid_formatters_arg_raises(size):
  253. # issue-28469
  254. df = DataFrame(columns=["a", "b", "c"])
  255. msg = "Formatters length({}) should match DataFrame number of columns(3)"
  256. with pytest.raises(ValueError, match=re.escape(msg.format(size))):
  257. df.to_html(formatters=["{}".format] * size)
  258. def test_to_html_truncate_formatter(datapath):
  259. # issue-25955
  260. data = [
  261. {"A": 1, "B": 2, "C": 3, "D": 4},
  262. {"A": 5, "B": 6, "C": 7, "D": 8},
  263. {"A": 9, "B": 10, "C": 11, "D": 12},
  264. {"A": 13, "B": 14, "C": 15, "D": 16},
  265. ]
  266. df = DataFrame(data)
  267. fmt = lambda x: str(x) + "_mod"
  268. formatters = [fmt, fmt, None, None]
  269. result = df.to_html(formatters=formatters, max_cols=3)
  270. expected = expected_html(datapath, "truncate_formatter")
  271. assert result == expected
  272. @pytest.mark.parametrize(
  273. "sparsify,expected",
  274. [(True, "truncate_multi_index"), (False, "truncate_multi_index_sparse_off")],
  275. )
  276. def test_to_html_truncate_multi_index(sparsify, expected, datapath):
  277. arrays = [
  278. ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
  279. ["one", "two", "one", "two", "one", "two", "one", "two"],
  280. ]
  281. df = DataFrame(index=arrays, columns=arrays)
  282. result = df.to_html(max_rows=7, max_cols=7, sparsify=sparsify)
  283. expected = expected_html(datapath, expected)
  284. assert result == expected
  285. @pytest.mark.parametrize(
  286. "option,result,expected",
  287. [
  288. (None, lambda df: df.to_html(), "1"),
  289. (None, lambda df: df.to_html(border=2), "2"),
  290. (2, lambda df: df.to_html(), "2"),
  291. (2, lambda df: df._repr_html_(), "2"),
  292. ],
  293. )
  294. def test_to_html_border(option, result, expected):
  295. df = DataFrame({"A": [1, 2]})
  296. if option is None:
  297. result = result(df)
  298. else:
  299. with option_context("display.html.border", option):
  300. result = result(df)
  301. expected = f'border="{expected}"'
  302. assert expected in result
  303. @pytest.mark.parametrize("biggie_df_fixture", ["mixed"], indirect=True)
  304. def test_to_html(biggie_df_fixture):
  305. # TODO: split this test
  306. df = biggie_df_fixture
  307. s = df.to_html()
  308. buf = StringIO()
  309. retval = df.to_html(buf=buf)
  310. assert retval is None
  311. assert buf.getvalue() == s
  312. assert isinstance(s, str)
  313. df.to_html(columns=["B", "A"], col_space=17)
  314. df.to_html(columns=["B", "A"], formatters={"A": lambda x: f"{x:.1f}"})
  315. df.to_html(columns=["B", "A"], float_format=str)
  316. df.to_html(columns=["B", "A"], col_space=12, float_format=str)
  317. @pytest.mark.parametrize("biggie_df_fixture", ["empty"], indirect=True)
  318. def test_to_html_empty_dataframe(biggie_df_fixture):
  319. df = biggie_df_fixture
  320. df.to_html()
  321. def test_to_html_filename(biggie_df_fixture, tmpdir):
  322. df = biggie_df_fixture
  323. expected = df.to_html()
  324. path = tmpdir.join("test.html")
  325. df.to_html(path)
  326. result = path.read()
  327. assert result == expected
  328. def test_to_html_with_no_bold():
  329. df = DataFrame({"x": np.random.randn(5)})
  330. html = df.to_html(bold_rows=False)
  331. result = html[html.find("</thead>")]
  332. assert "<strong" not in result
  333. def test_to_html_columns_arg(float_frame):
  334. result = float_frame.to_html(columns=["A"])
  335. assert "<th>B</th>" not in result
  336. @pytest.mark.parametrize(
  337. "columns,justify,expected",
  338. [
  339. (
  340. MultiIndex.from_tuples(
  341. list(zip(np.arange(2).repeat(2), np.mod(range(4), 2))),
  342. names=["CL0", "CL1"],
  343. ),
  344. "left",
  345. "multiindex_1",
  346. ),
  347. (
  348. MultiIndex.from_tuples(list(zip(range(4), np.mod(range(4), 2)))),
  349. "right",
  350. "multiindex_2",
  351. ),
  352. ],
  353. )
  354. def test_to_html_multiindex(columns, justify, expected, datapath):
  355. df = DataFrame([list("abcd"), list("efgh")], columns=columns)
  356. result = df.to_html(justify=justify)
  357. expected = expected_html(datapath, expected)
  358. assert result == expected
  359. def test_to_html_justify(justify, datapath):
  360. df = DataFrame(
  361. {"A": [6, 30000, 2], "B": [1, 2, 70000], "C": [223442, 0, 1]},
  362. columns=["A", "B", "C"],
  363. )
  364. result = df.to_html(justify=justify)
  365. expected = expected_html(datapath, "justify").format(justify=justify)
  366. assert result == expected
  367. @pytest.mark.parametrize(
  368. "justify", ["super-right", "small-left", "noinherit", "tiny", "pandas"]
  369. )
  370. def test_to_html_invalid_justify(justify):
  371. # GH 17527
  372. df = DataFrame()
  373. msg = "Invalid value for justify parameter"
  374. with pytest.raises(ValueError, match=msg):
  375. df.to_html(justify=justify)
  376. class TestHTMLIndex:
  377. @pytest.fixture
  378. def df(self):
  379. index = ["foo", "bar", "baz"]
  380. df = DataFrame(
  381. {"A": [1, 2, 3], "B": [1.2, 3.4, 5.6], "C": ["one", "two", np.nan]},
  382. columns=["A", "B", "C"],
  383. index=index,
  384. )
  385. return df
  386. @pytest.fixture
  387. def expected_without_index(self, datapath):
  388. return expected_html(datapath, "index_2")
  389. def test_to_html_flat_index_without_name(
  390. self, datapath, df, expected_without_index
  391. ):
  392. expected_with_index = expected_html(datapath, "index_1")
  393. assert df.to_html() == expected_with_index
  394. result = df.to_html(index=False)
  395. for i in df.index:
  396. assert i not in result
  397. assert result == expected_without_index
  398. def test_to_html_flat_index_with_name(self, datapath, df, expected_without_index):
  399. df.index = Index(["foo", "bar", "baz"], name="idx")
  400. expected_with_index = expected_html(datapath, "index_3")
  401. assert df.to_html() == expected_with_index
  402. assert df.to_html(index=False) == expected_without_index
  403. def test_to_html_multiindex_without_names(
  404. self, datapath, df, expected_without_index
  405. ):
  406. tuples = [("foo", "car"), ("foo", "bike"), ("bar", "car")]
  407. df.index = MultiIndex.from_tuples(tuples)
  408. expected_with_index = expected_html(datapath, "index_4")
  409. assert df.to_html() == expected_with_index
  410. result = df.to_html(index=False)
  411. for i in ["foo", "bar", "car", "bike"]:
  412. assert i not in result
  413. # must be the same result as normal index
  414. assert result == expected_without_index
  415. def test_to_html_multiindex_with_names(self, datapath, df, expected_without_index):
  416. tuples = [("foo", "car"), ("foo", "bike"), ("bar", "car")]
  417. df.index = MultiIndex.from_tuples(tuples, names=["idx1", "idx2"])
  418. expected_with_index = expected_html(datapath, "index_5")
  419. assert df.to_html() == expected_with_index
  420. assert df.to_html(index=False) == expected_without_index
  421. @pytest.mark.parametrize("classes", ["sortable draggable", ["sortable", "draggable"]])
  422. def test_to_html_with_classes(classes, datapath):
  423. df = DataFrame()
  424. expected = expected_html(datapath, "with_classes")
  425. result = df.to_html(classes=classes)
  426. assert result == expected
  427. def test_to_html_no_index_max_rows(datapath):
  428. # GH 14998
  429. df = DataFrame({"A": [1, 2, 3, 4]})
  430. result = df.to_html(index=False, max_rows=1)
  431. expected = expected_html(datapath, "gh14998_expected_output")
  432. assert result == expected
  433. def test_to_html_multiindex_max_cols(datapath):
  434. # GH 6131
  435. index = MultiIndex(
  436. levels=[["ba", "bb", "bc"], ["ca", "cb", "cc"]],
  437. codes=[[0, 1, 2], [0, 1, 2]],
  438. names=["b", "c"],
  439. )
  440. columns = MultiIndex(
  441. levels=[["d"], ["aa", "ab", "ac"]],
  442. codes=[[0, 0, 0], [0, 1, 2]],
  443. names=[None, "a"],
  444. )
  445. data = np.array(
  446. [[1.0, np.nan, np.nan], [np.nan, 2.0, np.nan], [np.nan, np.nan, 3.0]]
  447. )
  448. df = DataFrame(data, index, columns)
  449. result = df.to_html(max_cols=2)
  450. expected = expected_html(datapath, "gh6131_expected_output")
  451. assert result == expected
  452. def test_to_html_multi_indexes_index_false(datapath):
  453. # GH 22579
  454. df = DataFrame(
  455. {"a": range(10), "b": range(10, 20), "c": range(10, 20), "d": range(10, 20)}
  456. )
  457. df.columns = MultiIndex.from_product([["a", "b"], ["c", "d"]])
  458. df.index = MultiIndex.from_product([["a", "b"], ["c", "d", "e", "f", "g"]])
  459. result = df.to_html(index=False)
  460. expected = expected_html(datapath, "gh22579_expected_output")
  461. assert result == expected
  462. @pytest.mark.parametrize("index_names", [True, False])
  463. @pytest.mark.parametrize("header", [True, False])
  464. @pytest.mark.parametrize("index", [True, False])
  465. @pytest.mark.parametrize(
  466. "column_index, column_type",
  467. [
  468. (Index([0, 1]), "unnamed_standard"),
  469. (Index([0, 1], name="columns.name"), "named_standard"),
  470. (MultiIndex.from_product([["a"], ["b", "c"]]), "unnamed_multi"),
  471. (
  472. MultiIndex.from_product(
  473. [["a"], ["b", "c"]], names=["columns.name.0", "columns.name.1"]
  474. ),
  475. "named_multi",
  476. ),
  477. ],
  478. )
  479. @pytest.mark.parametrize(
  480. "row_index, row_type",
  481. [
  482. (Index([0, 1]), "unnamed_standard"),
  483. (Index([0, 1], name="index.name"), "named_standard"),
  484. (MultiIndex.from_product([["a"], ["b", "c"]]), "unnamed_multi"),
  485. (
  486. MultiIndex.from_product(
  487. [["a"], ["b", "c"]], names=["index.name.0", "index.name.1"]
  488. ),
  489. "named_multi",
  490. ),
  491. ],
  492. )
  493. def test_to_html_basic_alignment(
  494. datapath, row_index, row_type, column_index, column_type, index, header, index_names
  495. ):
  496. # GH 22747, GH 22579
  497. df = DataFrame(np.zeros((2, 2), dtype=int), index=row_index, columns=column_index)
  498. result = df.to_html(index=index, header=header, index_names=index_names)
  499. if not index:
  500. row_type = "none"
  501. elif not index_names and row_type.startswith("named"):
  502. row_type = "un" + row_type
  503. if not header:
  504. column_type = "none"
  505. elif not index_names and column_type.startswith("named"):
  506. column_type = "un" + column_type
  507. filename = "index_" + row_type + "_columns_" + column_type
  508. expected = expected_html(datapath, filename)
  509. assert result == expected
  510. @pytest.mark.parametrize("index_names", [True, False])
  511. @pytest.mark.parametrize("header", [True, False])
  512. @pytest.mark.parametrize("index", [True, False])
  513. @pytest.mark.parametrize(
  514. "column_index, column_type",
  515. [
  516. (Index(np.arange(8)), "unnamed_standard"),
  517. (Index(np.arange(8), name="columns.name"), "named_standard"),
  518. (
  519. MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]),
  520. "unnamed_multi",
  521. ),
  522. (
  523. MultiIndex.from_product(
  524. [["a", "b"], ["c", "d"], ["e", "f"]], names=["foo", None, "baz"]
  525. ),
  526. "named_multi",
  527. ),
  528. ],
  529. )
  530. @pytest.mark.parametrize(
  531. "row_index, row_type",
  532. [
  533. (Index(np.arange(8)), "unnamed_standard"),
  534. (Index(np.arange(8), name="index.name"), "named_standard"),
  535. (
  536. MultiIndex.from_product([["a", "b"], ["c", "d"], ["e", "f"]]),
  537. "unnamed_multi",
  538. ),
  539. (
  540. MultiIndex.from_product(
  541. [["a", "b"], ["c", "d"], ["e", "f"]], names=["foo", None, "baz"]
  542. ),
  543. "named_multi",
  544. ),
  545. ],
  546. )
  547. def test_to_html_alignment_with_truncation(
  548. datapath, row_index, row_type, column_index, column_type, index, header, index_names
  549. ):
  550. # GH 22747, GH 22579
  551. df = DataFrame(np.arange(64).reshape(8, 8), index=row_index, columns=column_index)
  552. result = df.to_html(
  553. max_rows=4, max_cols=4, index=index, header=header, index_names=index_names
  554. )
  555. if not index:
  556. row_type = "none"
  557. elif not index_names and row_type.startswith("named"):
  558. row_type = "un" + row_type
  559. if not header:
  560. column_type = "none"
  561. elif not index_names and column_type.startswith("named"):
  562. column_type = "un" + column_type
  563. filename = "trunc_df_index_" + row_type + "_columns_" + column_type
  564. expected = expected_html(datapath, filename)
  565. assert result == expected
  566. @pytest.mark.parametrize("index", [False, 0])
  567. def test_to_html_truncation_index_false_max_rows(datapath, index):
  568. # GH 15019
  569. data = [
  570. [1.764052, 0.400157],
  571. [0.978738, 2.240893],
  572. [1.867558, -0.977278],
  573. [0.950088, -0.151357],
  574. [-0.103219, 0.410599],
  575. ]
  576. df = DataFrame(data)
  577. result = df.to_html(max_rows=4, index=index)
  578. expected = expected_html(datapath, "gh15019_expected_output")
  579. assert result == expected
  580. @pytest.mark.parametrize("index", [False, 0])
  581. @pytest.mark.parametrize(
  582. "col_index_named, expected_output",
  583. [(False, "gh22783_expected_output"), (True, "gh22783_named_columns_index")],
  584. )
  585. def test_to_html_truncation_index_false_max_cols(
  586. datapath, index, col_index_named, expected_output
  587. ):
  588. # GH 22783
  589. data = [
  590. [1.764052, 0.400157, 0.978738, 2.240893, 1.867558],
  591. [-0.977278, 0.950088, -0.151357, -0.103219, 0.410599],
  592. ]
  593. df = DataFrame(data)
  594. if col_index_named:
  595. df.columns.rename("columns.name", inplace=True)
  596. result = df.to_html(max_cols=4, index=index)
  597. expected = expected_html(datapath, expected_output)
  598. assert result == expected
  599. @pytest.mark.parametrize("notebook", [True, False])
  600. def test_to_html_notebook_has_style(notebook):
  601. df = DataFrame({"A": [1, 2, 3]})
  602. result = df.to_html(notebook=notebook)
  603. if notebook:
  604. assert "tbody tr th:only-of-type" in result
  605. assert "vertical-align: middle;" in result
  606. assert "thead th" in result
  607. else:
  608. assert "tbody tr th:only-of-type" not in result
  609. assert "vertical-align: middle;" not in result
  610. assert "thead th" not in result
  611. def test_to_html_with_index_names_false():
  612. # GH 16493
  613. df = DataFrame({"A": [1, 2]}, index=Index(["a", "b"], name="myindexname"))
  614. result = df.to_html(index_names=False)
  615. assert "myindexname" not in result
  616. def test_to_html_with_id():
  617. # GH 8496
  618. df = DataFrame({"A": [1, 2]}, index=Index(["a", "b"], name="myindexname"))
  619. result = df.to_html(index_names=False, table_id="TEST_ID")
  620. assert ' id="TEST_ID"' in result
  621. @pytest.mark.parametrize(
  622. "value,float_format,expected",
  623. [
  624. (0.19999, "%.3f", "gh21625_expected_output"),
  625. (100.0, "%.0f", "gh22270_expected_output"),
  626. ],
  627. )
  628. def test_to_html_float_format_no_fixed_width(value, float_format, expected, datapath):
  629. # GH 21625, GH 22270
  630. df = DataFrame({"x": [value]})
  631. expected = expected_html(datapath, expected)
  632. result = df.to_html(float_format=float_format)
  633. assert result == expected
  634. @pytest.mark.parametrize(
  635. "render_links,expected",
  636. [(True, "render_links_true"), (False, "render_links_false")],
  637. )
  638. def test_to_html_render_links(render_links, expected, datapath):
  639. # GH 2679
  640. data = [
  641. [0, "https://pandas.pydata.org/?q1=a&q2=b", "pydata.org"],
  642. [0, "www.pydata.org", "pydata.org"],
  643. ]
  644. df = DataFrame(data, columns=["foo", "bar", None])
  645. result = df.to_html(render_links=render_links)
  646. expected = expected_html(datapath, expected)
  647. assert result == expected
  648. @pytest.mark.parametrize(
  649. "method,expected",
  650. [
  651. ("to_html", lambda x: lorem_ipsum),
  652. ("_repr_html_", lambda x: lorem_ipsum[: x - 4] + "..."), # regression case
  653. ],
  654. )
  655. @pytest.mark.parametrize("max_colwidth", [10, 20, 50, 100])
  656. def test_ignore_display_max_colwidth(method, expected, max_colwidth):
  657. # see gh-17004
  658. df = DataFrame([lorem_ipsum])
  659. with option_context("display.max_colwidth", max_colwidth):
  660. result = getattr(df, method)()
  661. expected = expected(max_colwidth)
  662. assert expected in result
  663. @pytest.mark.parametrize("classes", [True, 0])
  664. def test_to_html_invalid_classes_type(classes):
  665. # GH 25608
  666. df = DataFrame()
  667. msg = "classes must be a string, list, or tuple"
  668. with pytest.raises(TypeError, match=msg):
  669. df.to_html(classes=classes)
  670. def test_to_html_round_column_headers():
  671. # GH 17280
  672. df = DataFrame([1], columns=[0.55555])
  673. with option_context("display.precision", 3):
  674. html = df.to_html(notebook=False)
  675. notebook = df.to_html(notebook=True)
  676. assert "0.55555" in html
  677. assert "0.556" in notebook
  678. @pytest.mark.parametrize("unit", ["100px", "10%", "5em", 150])
  679. def test_to_html_with_col_space_units(unit):
  680. # GH 25941
  681. df = DataFrame(np.random.random(size=(1, 3)))
  682. result = df.to_html(col_space=unit)
  683. result = result.split("tbody")[0]
  684. hdrs = [x for x in result.split("\n") if re.search(r"<th[>\s]", x)]
  685. if isinstance(unit, int):
  686. unit = str(unit) + "px"
  687. for h in hdrs:
  688. expected = f'<th style="min-width: {unit};">'
  689. assert expected in h
  690. def test_html_repr_min_rows_default(datapath):
  691. # gh-27991
  692. # default setting no truncation even if above min_rows
  693. df = DataFrame({"a": range(20)})
  694. result = df._repr_html_()
  695. expected = expected_html(datapath, "html_repr_min_rows_default_no_truncation")
  696. assert result == expected
  697. # default of max_rows 60 triggers truncation if above
  698. df = DataFrame({"a": range(61)})
  699. result = df._repr_html_()
  700. expected = expected_html(datapath, "html_repr_min_rows_default_truncated")
  701. assert result == expected
  702. @pytest.mark.parametrize(
  703. "max_rows,min_rows,expected",
  704. [
  705. # truncated after first two rows
  706. (10, 4, "html_repr_max_rows_10_min_rows_4"),
  707. # when set to None, follow value of max_rows
  708. (12, None, "html_repr_max_rows_12_min_rows_None"),
  709. # when set value higher as max_rows, use the minimum
  710. (10, 12, "html_repr_max_rows_10_min_rows_12"),
  711. # max_rows of None -> never truncate
  712. (None, 12, "html_repr_max_rows_None_min_rows_12"),
  713. ],
  714. )
  715. def test_html_repr_min_rows(datapath, max_rows, min_rows, expected):
  716. # gh-27991
  717. df = DataFrame({"a": range(61)})
  718. expected = expected_html(datapath, expected)
  719. with option_context("display.max_rows", max_rows, "display.min_rows", min_rows):
  720. result = df._repr_html_()
  721. assert result == expected
  722. def test_to_html_multilevel(multiindex_year_month_day_dataframe_random_data):
  723. ymd = multiindex_year_month_day_dataframe_random_data
  724. ymd.columns.name = "foo"
  725. ymd.to_html()
  726. ymd.T.to_html()
  727. @pytest.mark.parametrize("na_rep", ["NaN", "Ted"])
  728. def test_to_html_na_rep_and_float_format(na_rep, datapath):
  729. # https://github.com/pandas-dev/pandas/issues/13828
  730. df = DataFrame(
  731. [
  732. ["A", 1.2225],
  733. ["A", None],
  734. ],
  735. columns=["Group", "Data"],
  736. )
  737. result = df.to_html(na_rep=na_rep, float_format="{:.2f}".format)
  738. expected = expected_html(datapath, "gh13828_expected_output")
  739. expected = expected.format(na_rep=na_rep)
  740. assert result == expected
  741. def test_to_html_na_rep_non_scalar_data(datapath):
  742. # GH47103
  743. df = DataFrame([dict(a=1, b=[1, 2, 3])])
  744. result = df.to_html(na_rep="-")
  745. expected = expected_html(datapath, "gh47103_expected_output")
  746. assert result == expected
  747. def test_to_html_float_format_object_col(datapath):
  748. # GH#40024
  749. df = DataFrame(data={"x": [1000.0, "test"]})
  750. result = df.to_html(float_format=lambda x: f"{x:,.0f}")
  751. expected = expected_html(datapath, "gh40024_expected_output")
  752. assert result == expected