test_to_string.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. from datetime import datetime
  2. from io import StringIO
  3. from textwrap import dedent
  4. import numpy as np
  5. import pytest
  6. import pandas.util._test_decorators as td
  7. from pandas import (
  8. DataFrame,
  9. Series,
  10. option_context,
  11. to_datetime,
  12. )
  13. def test_repr_embedded_ndarray():
  14. arr = np.empty(10, dtype=[("err", object)])
  15. for i in range(len(arr)):
  16. arr["err"][i] = np.random.randn(i)
  17. df = DataFrame(arr)
  18. repr(df["err"])
  19. repr(df)
  20. df.to_string()
  21. def test_repr_tuples():
  22. buf = StringIO()
  23. df = DataFrame({"tups": list(zip(range(10), range(10)))})
  24. repr(df)
  25. df.to_string(col_space=10, buf=buf)
  26. def test_to_string_truncate():
  27. # GH 9784 - dont truncate when calling DataFrame.to_string
  28. df = DataFrame(
  29. [
  30. {
  31. "a": "foo",
  32. "b": "bar",
  33. "c": "let's make this a very VERY long line that is longer "
  34. "than the default 50 character limit",
  35. "d": 1,
  36. },
  37. {"a": "foo", "b": "bar", "c": "stuff", "d": 1},
  38. ]
  39. )
  40. df.set_index(["a", "b", "c"])
  41. assert df.to_string() == (
  42. " a b "
  43. " c d\n"
  44. "0 foo bar let's make this a very VERY long line t"
  45. "hat is longer than the default 50 character limit 1\n"
  46. "1 foo bar "
  47. " stuff 1"
  48. )
  49. with option_context("max_colwidth", 20):
  50. # the display option has no effect on the to_string method
  51. assert df.to_string() == (
  52. " a b "
  53. " c d\n"
  54. "0 foo bar let's make this a very VERY long line t"
  55. "hat is longer than the default 50 character limit 1\n"
  56. "1 foo bar "
  57. " stuff 1"
  58. )
  59. assert df.to_string(max_colwidth=20) == (
  60. " a b c d\n"
  61. "0 foo bar let's make this ... 1\n"
  62. "1 foo bar stuff 1"
  63. )
  64. @pytest.mark.parametrize(
  65. "input_array, expected",
  66. [
  67. ("a", "a"),
  68. (["a", "b"], "a\nb"),
  69. ([1, "a"], "1\na"),
  70. (1, "1"),
  71. ([0, -1], " 0\n-1"),
  72. (1.0, "1.0"),
  73. ([" a", " b"], " a\n b"),
  74. ([".1", "1"], ".1\n 1"),
  75. (["10", "-10"], " 10\n-10"),
  76. ],
  77. )
  78. def test_format_remove_leading_space_series(input_array, expected):
  79. # GH: 24980
  80. s = Series(input_array).to_string(index=False)
  81. assert s == expected
  82. @pytest.mark.parametrize(
  83. "input_array, expected",
  84. [
  85. ({"A": ["a"]}, "A\na"),
  86. ({"A": ["a", "b"], "B": ["c", "dd"]}, "A B\na c\nb dd"),
  87. ({"A": ["a", 1], "B": ["aa", 1]}, "A B\na aa\n1 1"),
  88. ],
  89. )
  90. def test_format_remove_leading_space_dataframe(input_array, expected):
  91. # GH: 24980
  92. df = DataFrame(input_array).to_string(index=False)
  93. assert df == expected
  94. @pytest.mark.parametrize(
  95. "max_cols, max_rows, expected",
  96. [
  97. (
  98. 10,
  99. None,
  100. " 0 1 2 3 4 ... 6 7 8 9 10\n"
  101. " 0 0 0 0 0 ... 0 0 0 0 0\n"
  102. " 0 0 0 0 0 ... 0 0 0 0 0\n"
  103. " 0 0 0 0 0 ... 0 0 0 0 0\n"
  104. " 0 0 0 0 0 ... 0 0 0 0 0",
  105. ),
  106. (
  107. None,
  108. 2,
  109. " 0 1 2 3 4 5 6 7 8 9 10\n"
  110. " 0 0 0 0 0 0 0 0 0 0 0\n"
  111. " .. .. .. .. .. .. .. .. .. .. ..\n"
  112. " 0 0 0 0 0 0 0 0 0 0 0",
  113. ),
  114. (
  115. 10,
  116. 2,
  117. " 0 1 2 3 4 ... 6 7 8 9 10\n"
  118. " 0 0 0 0 0 ... 0 0 0 0 0\n"
  119. " .. .. .. .. .. ... .. .. .. .. ..\n"
  120. " 0 0 0 0 0 ... 0 0 0 0 0",
  121. ),
  122. (
  123. 9,
  124. 2,
  125. " 0 1 2 3 ... 7 8 9 10\n"
  126. " 0 0 0 0 ... 0 0 0 0\n"
  127. " .. .. .. .. ... .. .. .. ..\n"
  128. " 0 0 0 0 ... 0 0 0 0",
  129. ),
  130. (
  131. 1,
  132. 1,
  133. " 0 ...\n 0 ...\n.. ...",
  134. ),
  135. ],
  136. )
  137. def test_truncation_no_index(max_cols, max_rows, expected):
  138. df = DataFrame([[0] * 11] * 4)
  139. assert df.to_string(index=False, max_cols=max_cols, max_rows=max_rows) == expected
  140. def test_to_string_unicode_columns(float_frame):
  141. df = DataFrame({"\u03c3": np.arange(10.0)})
  142. buf = StringIO()
  143. df.to_string(buf=buf)
  144. buf.getvalue()
  145. buf = StringIO()
  146. df.info(buf=buf)
  147. buf.getvalue()
  148. result = float_frame.to_string()
  149. assert isinstance(result, str)
  150. def test_to_string_utf8_columns():
  151. n = "\u05d0".encode()
  152. with option_context("display.max_rows", 1):
  153. df = DataFrame([1, 2], columns=[n])
  154. repr(df)
  155. def test_to_string_unicode_two():
  156. dm = DataFrame({"c/\u03c3": []})
  157. buf = StringIO()
  158. dm.to_string(buf)
  159. def test_to_string_unicode_three():
  160. dm = DataFrame(["\xc2"])
  161. buf = StringIO()
  162. dm.to_string(buf)
  163. def test_to_string_with_formatters():
  164. df = DataFrame(
  165. {
  166. "int": [1, 2, 3],
  167. "float": [1.0, 2.0, 3.0],
  168. "object": [(1, 2), True, False],
  169. },
  170. columns=["int", "float", "object"],
  171. )
  172. formatters = [
  173. ("int", lambda x: f"0x{x:x}"),
  174. ("float", lambda x: f"[{x: 4.1f}]"),
  175. ("object", lambda x: f"-{x!s}-"),
  176. ]
  177. result = df.to_string(formatters=dict(formatters))
  178. result2 = df.to_string(formatters=list(zip(*formatters))[1])
  179. assert result == (
  180. " int float object\n"
  181. "0 0x1 [ 1.0] -(1, 2)-\n"
  182. "1 0x2 [ 2.0] -True-\n"
  183. "2 0x3 [ 3.0] -False-"
  184. )
  185. assert result == result2
  186. def test_to_string_with_datetime64_monthformatter():
  187. months = [datetime(2016, 1, 1), datetime(2016, 2, 2)]
  188. x = DataFrame({"months": months})
  189. def format_func(x):
  190. return x.strftime("%Y-%m")
  191. result = x.to_string(formatters={"months": format_func})
  192. expected = dedent(
  193. """\
  194. months
  195. 0 2016-01
  196. 1 2016-02"""
  197. )
  198. assert result.strip() == expected
  199. def test_to_string_with_datetime64_hourformatter():
  200. x = DataFrame(
  201. {"hod": to_datetime(["10:10:10.100", "12:12:12.120"], format="%H:%M:%S.%f")}
  202. )
  203. def format_func(x):
  204. return x.strftime("%H:%M")
  205. result = x.to_string(formatters={"hod": format_func})
  206. expected = dedent(
  207. """\
  208. hod
  209. 0 10:10
  210. 1 12:12"""
  211. )
  212. assert result.strip() == expected
  213. def test_to_string_with_formatters_unicode():
  214. df = DataFrame({"c/\u03c3": [1, 2, 3]})
  215. result = df.to_string(formatters={"c/\u03c3": str})
  216. expected = dedent(
  217. """\
  218. c/\u03c3
  219. 0 1
  220. 1 2
  221. 2 3"""
  222. )
  223. assert result == expected
  224. def test_to_string_complex_number_trims_zeros():
  225. s = Series([1.000000 + 1.000000j, 1.0 + 1.0j, 1.05 + 1.0j])
  226. result = s.to_string()
  227. expected = dedent(
  228. """\
  229. 0 1.00+1.00j
  230. 1 1.00+1.00j
  231. 2 1.05+1.00j"""
  232. )
  233. assert result == expected
  234. def test_nullable_float_to_string(float_ea_dtype):
  235. # https://github.com/pandas-dev/pandas/issues/36775
  236. dtype = float_ea_dtype
  237. s = Series([0.0, 1.0, None], dtype=dtype)
  238. result = s.to_string()
  239. expected = dedent(
  240. """\
  241. 0 0.0
  242. 1 1.0
  243. 2 <NA>"""
  244. )
  245. assert result == expected
  246. def test_nullable_int_to_string(any_int_ea_dtype):
  247. # https://github.com/pandas-dev/pandas/issues/36775
  248. dtype = any_int_ea_dtype
  249. s = Series([0, 1, None], dtype=dtype)
  250. result = s.to_string()
  251. expected = dedent(
  252. """\
  253. 0 0
  254. 1 1
  255. 2 <NA>"""
  256. )
  257. assert result == expected
  258. @pytest.mark.parametrize("na_rep", ["NaN", "Ted"])
  259. def test_to_string_na_rep_and_float_format(na_rep):
  260. # GH 13828
  261. df = DataFrame([["A", 1.2225], ["A", None]], columns=["Group", "Data"])
  262. result = df.to_string(na_rep=na_rep, float_format="{:.2f}".format)
  263. expected = dedent(
  264. f"""\
  265. Group Data
  266. 0 A 1.22
  267. 1 A {na_rep}"""
  268. )
  269. assert result == expected
  270. @pytest.mark.parametrize(
  271. "data,expected",
  272. [
  273. (
  274. {"col1": [1, 2], "col2": [3, 4]},
  275. " col1 col2\n0 1 3\n1 2 4",
  276. ),
  277. (
  278. {"col1": ["Abc", 0.756], "col2": [np.nan, 4.5435]},
  279. " col1 col2\n0 Abc NaN\n1 0.756 4.5435",
  280. ),
  281. (
  282. {"col1": [np.nan, "a"], "col2": [0.009, 3.543], "col3": ["Abc", 23]},
  283. " col1 col2 col3\n0 NaN 0.009 Abc\n1 a 3.543 23",
  284. ),
  285. ],
  286. )
  287. def test_to_string_max_rows_zero(data, expected):
  288. # GH35394
  289. result = DataFrame(data=data).to_string(max_rows=0)
  290. assert result == expected
  291. @td.skip_if_no("pyarrow")
  292. def test_to_string_string_dtype():
  293. # GH#50099
  294. df = DataFrame({"x": ["foo", "bar", "baz"], "y": ["a", "b", "c"], "z": [1, 2, 3]})
  295. df = df.astype(
  296. {"x": "string[pyarrow]", "y": "string[python]", "z": "int64[pyarrow]"}
  297. )
  298. result = df.dtypes.to_string()
  299. expected = dedent(
  300. """\
  301. x string[pyarrow]
  302. y string[python]
  303. z int64[pyarrow]"""
  304. )
  305. assert result == expected