test_repr.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538
  1. from datetime import (
  2. datetime,
  3. timedelta,
  4. )
  5. import numpy as np
  6. import pytest
  7. import pandas as pd
  8. from pandas import (
  9. Categorical,
  10. DataFrame,
  11. Index,
  12. Series,
  13. date_range,
  14. option_context,
  15. period_range,
  16. timedelta_range,
  17. )
  18. import pandas._testing as tm
  19. class TestSeriesRepr:
  20. def test_multilevel_name_print(self, lexsorted_two_level_string_multiindex):
  21. index = lexsorted_two_level_string_multiindex
  22. ser = Series(range(len(index)), index=index, name="sth")
  23. expected = [
  24. "first second",
  25. "foo one 0",
  26. " two 1",
  27. " three 2",
  28. "bar one 3",
  29. " two 4",
  30. "baz two 5",
  31. " three 6",
  32. "qux one 7",
  33. " two 8",
  34. " three 9",
  35. "Name: sth, dtype: int64",
  36. ]
  37. expected = "\n".join(expected)
  38. assert repr(ser) == expected
  39. def test_small_name_printing(self):
  40. # Test small Series.
  41. s = Series([0, 1, 2])
  42. s.name = "test"
  43. assert "Name: test" in repr(s)
  44. s.name = None
  45. assert "Name:" not in repr(s)
  46. def test_big_name_printing(self):
  47. # Test big Series (diff code path).
  48. s = Series(range(1000))
  49. s.name = "test"
  50. assert "Name: test" in repr(s)
  51. s.name = None
  52. assert "Name:" not in repr(s)
  53. def test_empty_name_printing(self):
  54. s = Series(index=date_range("20010101", "20020101"), name="test", dtype=object)
  55. assert "Name: test" in repr(s)
  56. @pytest.mark.parametrize("args", [(), (0, -1)])
  57. def test_float_range(self, args):
  58. str(Series(np.random.randn(1000), index=np.arange(1000, *args)))
  59. def test_empty_object(self):
  60. # empty
  61. str(Series(dtype=object))
  62. def test_string(self, string_series):
  63. str(string_series)
  64. str(string_series.astype(int))
  65. # with NaNs
  66. string_series[5:7] = np.NaN
  67. str(string_series)
  68. def test_object(self, object_series):
  69. str(object_series)
  70. def test_datetime(self, datetime_series):
  71. str(datetime_series)
  72. # with Nones
  73. ots = datetime_series.astype("O")
  74. ots[::2] = None
  75. repr(ots)
  76. @pytest.mark.parametrize(
  77. "name",
  78. [
  79. "",
  80. 1,
  81. 1.2,
  82. "foo",
  83. "\u03B1\u03B2\u03B3",
  84. "loooooooooooooooooooooooooooooooooooooooooooooooooooong",
  85. ("foo", "bar", "baz"),
  86. (1, 2),
  87. ("foo", 1, 2.3),
  88. ("\u03B1", "\u03B2", "\u03B3"),
  89. ("\u03B1", "bar"),
  90. ],
  91. )
  92. def test_various_names(self, name, string_series):
  93. # various names
  94. string_series.name = name
  95. repr(string_series)
  96. def test_tuple_name(self):
  97. biggie = Series(
  98. np.random.randn(1000), index=np.arange(1000), name=("foo", "bar", "baz")
  99. )
  100. repr(biggie)
  101. @pytest.mark.parametrize("arg", [100, 1001])
  102. def test_tidy_repr_name_0(self, arg):
  103. # tidy repr
  104. ser = Series(np.random.randn(arg), name=0)
  105. rep_str = repr(ser)
  106. assert "Name: 0" in rep_str
  107. def test_newline(self):
  108. ser = Series(["a\n\r\tb"], name="a\n\r\td", index=["a\n\r\tf"])
  109. assert "\t" not in repr(ser)
  110. assert "\r" not in repr(ser)
  111. assert "a\n" not in repr(ser)
  112. @pytest.mark.parametrize(
  113. "name, expected",
  114. [
  115. ["foo", "Series([], Name: foo, dtype: int64)"],
  116. [None, "Series([], dtype: int64)"],
  117. ],
  118. )
  119. def test_empty_int64(self, name, expected):
  120. # with empty series (#4651)
  121. s = Series([], dtype=np.int64, name=name)
  122. assert repr(s) == expected
  123. def test_tidy_repr(self):
  124. a = Series(["\u05d0"] * 1000)
  125. a.name = "title1"
  126. repr(a) # should not raise exception
  127. def test_repr_bool_fails(self, capsys):
  128. s = Series([DataFrame(np.random.randn(2, 2)) for i in range(5)])
  129. # It works (with no Cython exception barf)!
  130. repr(s)
  131. captured = capsys.readouterr()
  132. assert captured.err == ""
  133. def test_repr_name_iterable_indexable(self):
  134. s = Series([1, 2, 3], name=np.int64(3))
  135. # it works!
  136. repr(s)
  137. s.name = ("\u05d0",) * 2
  138. repr(s)
  139. def test_repr_should_return_str(self):
  140. # https://docs.python.org/3/reference/datamodel.html#object.__repr__
  141. # ...The return value must be a string object.
  142. # (str on py2.x, str (unicode) on py3)
  143. data = [8, 5, 3, 5]
  144. index1 = ["\u03c3", "\u03c4", "\u03c5", "\u03c6"]
  145. df = Series(data, index=index1)
  146. assert type(df.__repr__() == str) # both py2 / 3
  147. def test_repr_max_rows(self):
  148. # GH 6863
  149. with option_context("display.max_rows", None):
  150. str(Series(range(1001))) # should not raise exception
  151. def test_unicode_string_with_unicode(self):
  152. df = Series(["\u05d0"], name="\u05d1")
  153. str(df)
  154. def test_str_to_bytes_raises(self):
  155. # GH 26447
  156. df = Series(["abc"], name="abc")
  157. msg = "^'str' object cannot be interpreted as an integer$"
  158. with pytest.raises(TypeError, match=msg):
  159. bytes(df)
  160. def test_timeseries_repr_object_dtype(self):
  161. index = Index(
  162. [datetime(2000, 1, 1) + timedelta(i) for i in range(1000)], dtype=object
  163. )
  164. ts = Series(np.random.randn(len(index)), index)
  165. repr(ts)
  166. ts = tm.makeTimeSeries(1000)
  167. assert repr(ts).splitlines()[-1].startswith("Freq:")
  168. ts2 = ts.iloc[np.random.randint(0, len(ts) - 1, 400)]
  169. repr(ts2).splitlines()[-1]
  170. def test_latex_repr(self):
  171. pytest.importorskip("jinja2") # uses Styler implementation
  172. result = r"""\begin{tabular}{ll}
  173. \toprule
  174. & 0 \\
  175. \midrule
  176. 0 & $\alpha$ \\
  177. 1 & b \\
  178. 2 & c \\
  179. \bottomrule
  180. \end{tabular}
  181. """
  182. with option_context(
  183. "styler.format.escape", None, "styler.render.repr", "latex"
  184. ):
  185. s = Series([r"$\alpha$", "b", "c"])
  186. assert result == s._repr_latex_()
  187. assert s._repr_latex_() is None
  188. def test_index_repr_in_frame_with_nan(self):
  189. # see gh-25061
  190. i = Index([1, np.nan])
  191. s = Series([1, 2], index=i)
  192. exp = """1.0 1\nNaN 2\ndtype: int64"""
  193. assert repr(s) == exp
  194. def test_format_pre_1900_dates(self):
  195. rng = date_range("1/1/1850", "1/1/1950", freq="A-DEC")
  196. rng.format()
  197. ts = Series(1, index=rng)
  198. repr(ts)
  199. def test_series_repr_nat(self):
  200. series = Series([0, 1000, 2000, pd.NaT._value], dtype="M8[ns]")
  201. result = repr(series)
  202. expected = (
  203. "0 1970-01-01 00:00:00.000000\n"
  204. "1 1970-01-01 00:00:00.000001\n"
  205. "2 1970-01-01 00:00:00.000002\n"
  206. "3 NaT\n"
  207. "dtype: datetime64[ns]"
  208. )
  209. assert result == expected
  210. def test_float_repr(self):
  211. # GH#35603
  212. # check float format when cast to object
  213. ser = Series([1.0]).astype(object)
  214. expected = "0 1.0\ndtype: object"
  215. assert repr(ser) == expected
  216. def test_different_null_objects(self):
  217. # GH#45263
  218. ser = Series([1, 2, 3, 4], [True, None, np.nan, pd.NaT])
  219. result = repr(ser)
  220. expected = "True 1\nNone 2\nNaN 3\nNaT 4\ndtype: int64"
  221. assert result == expected
  222. class TestCategoricalRepr:
  223. def test_categorical_repr_unicode(self):
  224. # see gh-21002
  225. class County:
  226. name = "San Sebastián"
  227. state = "PR"
  228. def __repr__(self) -> str:
  229. return self.name + ", " + self.state
  230. cat = Categorical([County() for _ in range(61)])
  231. idx = Index(cat)
  232. ser = idx.to_series()
  233. repr(ser)
  234. str(ser)
  235. def test_categorical_repr(self):
  236. a = Series(Categorical([1, 2, 3, 4]))
  237. exp = (
  238. "0 1\n1 2\n2 3\n3 4\n"
  239. + "dtype: category\nCategories (4, int64): [1, 2, 3, 4]"
  240. )
  241. assert exp == a.__str__()
  242. a = Series(Categorical(["a", "b"] * 25))
  243. exp = (
  244. "0 a\n1 b\n"
  245. + " ..\n"
  246. + "48 a\n49 b\n"
  247. + "Length: 50, dtype: category\nCategories (2, object): ['a', 'b']"
  248. )
  249. with option_context("display.max_rows", 5):
  250. assert exp == repr(a)
  251. levs = list("abcdefghijklmnopqrstuvwxyz")
  252. a = Series(Categorical(["a", "b"], categories=levs, ordered=True))
  253. exp = (
  254. "0 a\n1 b\n" + "dtype: category\n"
  255. "Categories (26, object): ['a' < 'b' < 'c' < 'd' ... 'w' < 'x' < 'y' < 'z']"
  256. )
  257. assert exp == a.__str__()
  258. def test_categorical_series_repr(self):
  259. s = Series(Categorical([1, 2, 3]))
  260. exp = """0 1
  261. 1 2
  262. 2 3
  263. dtype: category
  264. Categories (3, int64): [1, 2, 3]"""
  265. assert repr(s) == exp
  266. s = Series(Categorical(np.arange(10)))
  267. exp = f"""0 0
  268. 1 1
  269. 2 2
  270. 3 3
  271. 4 4
  272. 5 5
  273. 6 6
  274. 7 7
  275. 8 8
  276. 9 9
  277. dtype: category
  278. Categories (10, {np.int_().dtype}): [0, 1, 2, 3, ..., 6, 7, 8, 9]"""
  279. assert repr(s) == exp
  280. def test_categorical_series_repr_ordered(self):
  281. s = Series(Categorical([1, 2, 3], ordered=True))
  282. exp = """0 1
  283. 1 2
  284. 2 3
  285. dtype: category
  286. Categories (3, int64): [1 < 2 < 3]"""
  287. assert repr(s) == exp
  288. s = Series(Categorical(np.arange(10), ordered=True))
  289. exp = f"""0 0
  290. 1 1
  291. 2 2
  292. 3 3
  293. 4 4
  294. 5 5
  295. 6 6
  296. 7 7
  297. 8 8
  298. 9 9
  299. dtype: category
  300. Categories (10, {np.int_().dtype}): [0 < 1 < 2 < 3 ... 6 < 7 < 8 < 9]"""
  301. assert repr(s) == exp
  302. def test_categorical_series_repr_datetime(self):
  303. idx = date_range("2011-01-01 09:00", freq="H", periods=5)
  304. s = Series(Categorical(idx))
  305. exp = """0 2011-01-01 09:00:00
  306. 1 2011-01-01 10:00:00
  307. 2 2011-01-01 11:00:00
  308. 3 2011-01-01 12:00:00
  309. 4 2011-01-01 13:00:00
  310. dtype: category
  311. Categories (5, datetime64[ns]): [2011-01-01 09:00:00, 2011-01-01 10:00:00, 2011-01-01 11:00:00,
  312. 2011-01-01 12:00:00, 2011-01-01 13:00:00]""" # noqa:E501
  313. assert repr(s) == exp
  314. idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern")
  315. s = Series(Categorical(idx))
  316. exp = """0 2011-01-01 09:00:00-05:00
  317. 1 2011-01-01 10:00:00-05:00
  318. 2 2011-01-01 11:00:00-05:00
  319. 3 2011-01-01 12:00:00-05:00
  320. 4 2011-01-01 13:00:00-05:00
  321. dtype: category
  322. Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00, 2011-01-01 10:00:00-05:00,
  323. 2011-01-01 11:00:00-05:00, 2011-01-01 12:00:00-05:00,
  324. 2011-01-01 13:00:00-05:00]""" # noqa:E501
  325. assert repr(s) == exp
  326. def test_categorical_series_repr_datetime_ordered(self):
  327. idx = date_range("2011-01-01 09:00", freq="H", periods=5)
  328. s = Series(Categorical(idx, ordered=True))
  329. exp = """0 2011-01-01 09:00:00
  330. 1 2011-01-01 10:00:00
  331. 2 2011-01-01 11:00:00
  332. 3 2011-01-01 12:00:00
  333. 4 2011-01-01 13:00:00
  334. dtype: category
  335. Categories (5, datetime64[ns]): [2011-01-01 09:00:00 < 2011-01-01 10:00:00 < 2011-01-01 11:00:00 <
  336. 2011-01-01 12:00:00 < 2011-01-01 13:00:00]""" # noqa:E501
  337. assert repr(s) == exp
  338. idx = date_range("2011-01-01 09:00", freq="H", periods=5, tz="US/Eastern")
  339. s = Series(Categorical(idx, ordered=True))
  340. exp = """0 2011-01-01 09:00:00-05:00
  341. 1 2011-01-01 10:00:00-05:00
  342. 2 2011-01-01 11:00:00-05:00
  343. 3 2011-01-01 12:00:00-05:00
  344. 4 2011-01-01 13:00:00-05:00
  345. dtype: category
  346. Categories (5, datetime64[ns, US/Eastern]): [2011-01-01 09:00:00-05:00 < 2011-01-01 10:00:00-05:00 <
  347. 2011-01-01 11:00:00-05:00 < 2011-01-01 12:00:00-05:00 <
  348. 2011-01-01 13:00:00-05:00]""" # noqa:E501
  349. assert repr(s) == exp
  350. def test_categorical_series_repr_period(self):
  351. idx = period_range("2011-01-01 09:00", freq="H", periods=5)
  352. s = Series(Categorical(idx))
  353. exp = """0 2011-01-01 09:00
  354. 1 2011-01-01 10:00
  355. 2 2011-01-01 11:00
  356. 3 2011-01-01 12:00
  357. 4 2011-01-01 13:00
  358. dtype: category
  359. Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
  360. 2011-01-01 13:00]""" # noqa:E501
  361. assert repr(s) == exp
  362. idx = period_range("2011-01", freq="M", periods=5)
  363. s = Series(Categorical(idx))
  364. exp = """0 2011-01
  365. 1 2011-02
  366. 2 2011-03
  367. 3 2011-04
  368. 4 2011-05
  369. dtype: category
  370. Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
  371. assert repr(s) == exp
  372. def test_categorical_series_repr_period_ordered(self):
  373. idx = period_range("2011-01-01 09:00", freq="H", periods=5)
  374. s = Series(Categorical(idx, ordered=True))
  375. exp = """0 2011-01-01 09:00
  376. 1 2011-01-01 10:00
  377. 2 2011-01-01 11:00
  378. 3 2011-01-01 12:00
  379. 4 2011-01-01 13:00
  380. dtype: category
  381. Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
  382. 2011-01-01 13:00]""" # noqa:E501
  383. assert repr(s) == exp
  384. idx = period_range("2011-01", freq="M", periods=5)
  385. s = Series(Categorical(idx, ordered=True))
  386. exp = """0 2011-01
  387. 1 2011-02
  388. 2 2011-03
  389. 3 2011-04
  390. 4 2011-05
  391. dtype: category
  392. Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
  393. assert repr(s) == exp
  394. def test_categorical_series_repr_timedelta(self):
  395. idx = timedelta_range("1 days", periods=5)
  396. s = Series(Categorical(idx))
  397. exp = """0 1 days
  398. 1 2 days
  399. 2 3 days
  400. 3 4 days
  401. 4 5 days
  402. dtype: category
  403. Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]"""
  404. assert repr(s) == exp
  405. idx = timedelta_range("1 hours", periods=10)
  406. s = Series(Categorical(idx))
  407. exp = """0 0 days 01:00:00
  408. 1 1 days 01:00:00
  409. 2 2 days 01:00:00
  410. 3 3 days 01:00:00
  411. 4 4 days 01:00:00
  412. 5 5 days 01:00:00
  413. 6 6 days 01:00:00
  414. 7 7 days 01:00:00
  415. 8 8 days 01:00:00
  416. 9 9 days 01:00:00
  417. dtype: category
  418. Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00,
  419. 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00,
  420. 8 days 01:00:00, 9 days 01:00:00]""" # noqa:E501
  421. assert repr(s) == exp
  422. def test_categorical_series_repr_timedelta_ordered(self):
  423. idx = timedelta_range("1 days", periods=5)
  424. s = Series(Categorical(idx, ordered=True))
  425. exp = """0 1 days
  426. 1 2 days
  427. 2 3 days
  428. 3 4 days
  429. 4 5 days
  430. dtype: category
  431. Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]"""
  432. assert repr(s) == exp
  433. idx = timedelta_range("1 hours", periods=10)
  434. s = Series(Categorical(idx, ordered=True))
  435. exp = """0 0 days 01:00:00
  436. 1 1 days 01:00:00
  437. 2 2 days 01:00:00
  438. 3 3 days 01:00:00
  439. 4 4 days 01:00:00
  440. 5 5 days 01:00:00
  441. 6 6 days 01:00:00
  442. 7 7 days 01:00:00
  443. 8 8 days 01:00:00
  444. 9 9 days 01:00:00
  445. dtype: category
  446. Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 <
  447. 3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 <
  448. 8 days 01:00:00 < 9 days 01:00:00]""" # noqa:E501
  449. assert repr(s) == exp