test_repr_info.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376
  1. from datetime import (
  2. datetime,
  3. timedelta,
  4. )
  5. from io import StringIO
  6. import numpy as np
  7. import pytest
  8. from pandas import (
  9. NA,
  10. Categorical,
  11. DataFrame,
  12. MultiIndex,
  13. NaT,
  14. PeriodIndex,
  15. Series,
  16. Timestamp,
  17. date_range,
  18. option_context,
  19. period_range,
  20. )
  21. import pandas._testing as tm
  22. import pandas.io.formats.format as fmt
  23. class TestDataFrameReprInfoEtc:
  24. def test_repr_bytes_61_lines(self):
  25. # GH#12857
  26. lets = list("ACDEFGHIJKLMNOP")
  27. slen = 50
  28. nseqs = 1000
  29. words = [[np.random.choice(lets) for x in range(slen)] for _ in range(nseqs)]
  30. df = DataFrame(words).astype("U1")
  31. assert (df.dtypes == object).all()
  32. # smoke tests; at one point this raised with 61 but not 60
  33. repr(df)
  34. repr(df.iloc[:60, :])
  35. repr(df.iloc[:61, :])
  36. def test_repr_unicode_level_names(self, frame_or_series):
  37. index = MultiIndex.from_tuples([(0, 0), (1, 1)], names=["\u0394", "i1"])
  38. obj = DataFrame(np.random.randn(2, 4), index=index)
  39. obj = tm.get_obj(obj, frame_or_series)
  40. repr(obj)
  41. def test_assign_index_sequences(self):
  42. # GH#2200
  43. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}).set_index(
  44. ["a", "b"]
  45. )
  46. index = list(df.index)
  47. index[0] = ("faz", "boo")
  48. df.index = index
  49. repr(df)
  50. # this travels an improper code path
  51. index[0] = ["faz", "boo"]
  52. df.index = index
  53. repr(df)
  54. def test_repr_with_mi_nat(self):
  55. df = DataFrame({"X": [1, 2]}, index=[[NaT, Timestamp("20130101")], ["a", "b"]])
  56. result = repr(df)
  57. expected = " X\nNaT a 1\n2013-01-01 b 2"
  58. assert result == expected
  59. def test_repr_with_different_nulls(self):
  60. # GH45263
  61. df = DataFrame([1, 2, 3, 4], [True, None, np.nan, NaT])
  62. result = repr(df)
  63. expected = """ 0
  64. True 1
  65. None 2
  66. NaN 3
  67. NaT 4"""
  68. assert result == expected
  69. def test_repr_with_different_nulls_cols(self):
  70. # GH45263
  71. d = {np.nan: [1, 2], None: [3, 4], NaT: [6, 7], True: [8, 9]}
  72. df = DataFrame(data=d)
  73. result = repr(df)
  74. expected = """ NaN None NaT True
  75. 0 1 3 6 8
  76. 1 2 4 7 9"""
  77. assert result == expected
  78. def test_multiindex_na_repr(self):
  79. # only an issue with long columns
  80. df3 = DataFrame(
  81. {
  82. "A" * 30: {("A", "A0006000", "nuit"): "A0006000"},
  83. "B" * 30: {("A", "A0006000", "nuit"): np.nan},
  84. "C" * 30: {("A", "A0006000", "nuit"): np.nan},
  85. "D" * 30: {("A", "A0006000", "nuit"): np.nan},
  86. "E" * 30: {("A", "A0006000", "nuit"): "A"},
  87. "F" * 30: {("A", "A0006000", "nuit"): np.nan},
  88. }
  89. )
  90. idf = df3.set_index(["A" * 30, "C" * 30])
  91. repr(idf)
  92. def test_repr_name_coincide(self):
  93. index = MultiIndex.from_tuples(
  94. [("a", 0, "foo"), ("b", 1, "bar")], names=["a", "b", "c"]
  95. )
  96. df = DataFrame({"value": [0, 1]}, index=index)
  97. lines = repr(df).split("\n")
  98. assert lines[2].startswith("a 0 foo")
  99. def test_repr_to_string(
  100. self,
  101. multiindex_year_month_day_dataframe_random_data,
  102. multiindex_dataframe_random_data,
  103. ):
  104. ymd = multiindex_year_month_day_dataframe_random_data
  105. frame = multiindex_dataframe_random_data
  106. repr(frame)
  107. repr(ymd)
  108. repr(frame.T)
  109. repr(ymd.T)
  110. buf = StringIO()
  111. frame.to_string(buf=buf)
  112. ymd.to_string(buf=buf)
  113. frame.T.to_string(buf=buf)
  114. ymd.T.to_string(buf=buf)
  115. def test_repr_empty(self):
  116. # empty
  117. repr(DataFrame())
  118. # empty with index
  119. frame = DataFrame(index=np.arange(1000))
  120. repr(frame)
  121. def test_repr_mixed(self, float_string_frame):
  122. buf = StringIO()
  123. # mixed
  124. repr(float_string_frame)
  125. float_string_frame.info(verbose=False, buf=buf)
  126. @pytest.mark.slow
  127. def test_repr_mixed_big(self):
  128. # big mixed
  129. biggie = DataFrame(
  130. {"A": np.random.randn(200), "B": tm.makeStringIndex(200)}, index=range(200)
  131. )
  132. biggie.loc[:20, "A"] = np.nan
  133. biggie.loc[:20, "B"] = np.nan
  134. repr(biggie)
  135. def test_repr(self, float_frame):
  136. buf = StringIO()
  137. # small one
  138. repr(float_frame)
  139. float_frame.info(verbose=False, buf=buf)
  140. # even smaller
  141. float_frame.reindex(columns=["A"]).info(verbose=False, buf=buf)
  142. float_frame.reindex(columns=["A", "B"]).info(verbose=False, buf=buf)
  143. # exhausting cases in DataFrame.info
  144. # columns but no index
  145. no_index = DataFrame(columns=[0, 1, 3])
  146. repr(no_index)
  147. # no columns or index
  148. DataFrame().info(buf=buf)
  149. df = DataFrame(["a\n\r\tb"], columns=["a\n\r\td"], index=["a\n\r\tf"])
  150. assert "\t" not in repr(df)
  151. assert "\r" not in repr(df)
  152. assert "a\n" not in repr(df)
  153. def test_repr_dimensions(self):
  154. df = DataFrame([[1, 2], [3, 4]])
  155. with option_context("display.show_dimensions", True):
  156. assert "2 rows x 2 columns" in repr(df)
  157. with option_context("display.show_dimensions", False):
  158. assert "2 rows x 2 columns" not in repr(df)
  159. with option_context("display.show_dimensions", "truncate"):
  160. assert "2 rows x 2 columns" not in repr(df)
  161. @pytest.mark.slow
  162. def test_repr_big(self):
  163. # big one
  164. biggie = DataFrame(np.zeros((200, 4)), columns=range(4), index=range(200))
  165. repr(biggie)
  166. def test_repr_unsortable(self, float_frame):
  167. # columns are not sortable
  168. unsortable = DataFrame(
  169. {
  170. "foo": [1] * 50,
  171. datetime.today(): [1] * 50,
  172. "bar": ["bar"] * 50,
  173. datetime.today() + timedelta(1): ["bar"] * 50,
  174. },
  175. index=np.arange(50),
  176. )
  177. repr(unsortable)
  178. fmt.set_option("display.precision", 3)
  179. repr(float_frame)
  180. fmt.set_option("display.max_rows", 10, "display.max_columns", 2)
  181. repr(float_frame)
  182. fmt.set_option("display.max_rows", 1000, "display.max_columns", 1000)
  183. repr(float_frame)
  184. tm.reset_display_options()
  185. def test_repr_unicode(self):
  186. uval = "\u03c3\u03c3\u03c3\u03c3"
  187. df = DataFrame({"A": [uval, uval]})
  188. result = repr(df)
  189. ex_top = " A"
  190. assert result.split("\n")[0].rstrip() == ex_top
  191. df = DataFrame({"A": [uval, uval]})
  192. result = repr(df)
  193. assert result.split("\n")[0].rstrip() == ex_top
  194. def test_unicode_string_with_unicode(self):
  195. df = DataFrame({"A": ["\u05d0"]})
  196. str(df)
  197. def test_repr_unicode_columns(self):
  198. df = DataFrame({"\u05d0": [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]})
  199. repr(df.columns) # should not raise UnicodeDecodeError
  200. def test_str_to_bytes_raises(self):
  201. # GH 26447
  202. df = DataFrame({"A": ["abc"]})
  203. msg = "^'str' object cannot be interpreted as an integer$"
  204. with pytest.raises(TypeError, match=msg):
  205. bytes(df)
  206. def test_very_wide_info_repr(self):
  207. df = DataFrame(np.random.randn(10, 20), columns=tm.rands_array(10, 20))
  208. repr(df)
  209. def test_repr_column_name_unicode_truncation_bug(self):
  210. # #1906
  211. df = DataFrame(
  212. {
  213. "Id": [7117434],
  214. "StringCol": (
  215. "Is it possible to modify drop plot code"
  216. "so that the output graph is displayed "
  217. "in iphone simulator, Is it possible to "
  218. "modify drop plot code so that the "
  219. "output graph is \xe2\x80\xa8displayed "
  220. "in iphone simulator.Now we are adding "
  221. "the CSV file externally. I want to Call "
  222. "the File through the code.."
  223. ),
  224. }
  225. )
  226. with option_context("display.max_columns", 20):
  227. assert "StringCol" in repr(df)
  228. def test_latex_repr(self):
  229. pytest.importorskip("jinja2")
  230. expected = r"""\begin{tabular}{llll}
  231. \toprule
  232. & 0 & 1 & 2 \\
  233. \midrule
  234. 0 & $\alpha$ & b & c \\
  235. 1 & 1 & 2 & 3 \\
  236. \bottomrule
  237. \end{tabular}
  238. """
  239. with option_context(
  240. "styler.format.escape", None, "styler.render.repr", "latex"
  241. ):
  242. df = DataFrame([[r"$\alpha$", "b", "c"], [1, 2, 3]])
  243. result = df._repr_latex_()
  244. assert result == expected
  245. # GH 12182
  246. assert df._repr_latex_() is None
  247. def test_repr_categorical_dates_periods(self):
  248. # normal DataFrame
  249. dt = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern")
  250. p = period_range("2011-01", freq="M", periods=5)
  251. df = DataFrame({"dt": dt, "p": p})
  252. exp = """ dt p
  253. 0 2011-01-01 09:00:00-05:00 2011-01
  254. 1 2011-01-01 10:00:00-05:00 2011-02
  255. 2 2011-01-01 11:00:00-05:00 2011-03
  256. 3 2011-01-01 12:00:00-05:00 2011-04
  257. 4 2011-01-01 13:00:00-05:00 2011-05"""
  258. assert repr(df) == exp
  259. df2 = DataFrame({"dt": Categorical(dt), "p": Categorical(p)})
  260. assert repr(df2) == exp
  261. @pytest.mark.parametrize("arg", [np.datetime64, np.timedelta64])
  262. @pytest.mark.parametrize(
  263. "box, expected",
  264. [[Series, "0 NaT\ndtype: object"], [DataFrame, " 0\n0 NaT"]],
  265. )
  266. def test_repr_np_nat_with_object(self, arg, box, expected):
  267. # GH 25445
  268. result = repr(box([arg("NaT")], dtype=object))
  269. assert result == expected
  270. def test_frame_datetime64_pre1900_repr(self):
  271. df = DataFrame({"year": date_range("1/1/1700", periods=50, freq="A-DEC")})
  272. # it works!
  273. repr(df)
  274. def test_frame_to_string_with_periodindex(self):
  275. index = PeriodIndex(["2011-1", "2011-2", "2011-3"], freq="M")
  276. frame = DataFrame(np.random.randn(3, 4), index=index)
  277. # it works!
  278. frame.to_string()
  279. def test_to_string_ea_na_in_multiindex(self):
  280. # GH#47986
  281. df = DataFrame(
  282. {"a": [1, 2]},
  283. index=MultiIndex.from_arrays([Series([NA, 1], dtype="Int64")]),
  284. )
  285. result = df.to_string()
  286. expected = """ a
  287. <NA> 1
  288. 1 2"""
  289. assert result == expected
  290. def test_datetime64tz_slice_non_truncate(self):
  291. # GH 30263
  292. df = DataFrame({"x": date_range("2019", periods=10, tz="UTC")})
  293. expected = repr(df)
  294. df = df.iloc[:, :5]
  295. result = repr(df)
  296. assert result == expected
  297. def test_masked_ea_with_formatter(self):
  298. # GH#39336
  299. df = DataFrame(
  300. {
  301. "a": Series([0.123456789, 1.123456789], dtype="Float64"),
  302. "b": Series([1, 2], dtype="Int64"),
  303. }
  304. )
  305. result = df.to_string(formatters=["{:.2f}".format, "{:.2f}".format])
  306. expected = """ a b
  307. 0 0.12 1.00
  308. 1 1.12 2.00"""
  309. assert result == expected