test_series_info.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. from io import StringIO
  2. from string import ascii_uppercase as uppercase
  3. import textwrap
  4. import numpy as np
  5. import pytest
  6. from pandas.compat import PYPY
  7. from pandas import (
  8. CategoricalIndex,
  9. MultiIndex,
  10. Series,
  11. date_range,
  12. )
  13. def test_info_categorical_column_just_works():
  14. n = 2500
  15. data = np.array(list("abcdefghij")).take(np.random.randint(0, 10, size=n))
  16. s = Series(data).astype("category")
  17. s.isna()
  18. buf = StringIO()
  19. s.info(buf=buf)
  20. s2 = s[s == "d"]
  21. buf = StringIO()
  22. s2.info(buf=buf)
  23. def test_info_categorical():
  24. # GH14298
  25. idx = CategoricalIndex(["a", "b"])
  26. s = Series(np.zeros(2), index=idx)
  27. buf = StringIO()
  28. s.info(buf=buf)
  29. @pytest.mark.parametrize("verbose", [True, False])
  30. def test_info_series(lexsorted_two_level_string_multiindex, verbose):
  31. index = lexsorted_two_level_string_multiindex
  32. ser = Series(range(len(index)), index=index, name="sth")
  33. buf = StringIO()
  34. ser.info(verbose=verbose, buf=buf)
  35. result = buf.getvalue()
  36. expected = textwrap.dedent(
  37. """\
  38. <class 'pandas.core.series.Series'>
  39. MultiIndex: 10 entries, ('foo', 'one') to ('qux', 'three')
  40. """
  41. )
  42. if verbose:
  43. expected += textwrap.dedent(
  44. """\
  45. Series name: sth
  46. Non-Null Count Dtype
  47. -------------- -----
  48. 10 non-null int64
  49. """
  50. )
  51. expected += textwrap.dedent(
  52. f"""\
  53. dtypes: int64(1)
  54. memory usage: {ser.memory_usage()}.0+ bytes
  55. """
  56. )
  57. assert result == expected
  58. def test_info_memory():
  59. s = Series([1, 2], dtype="i8")
  60. buf = StringIO()
  61. s.info(buf=buf)
  62. result = buf.getvalue()
  63. memory_bytes = float(s.memory_usage())
  64. expected = textwrap.dedent(
  65. f"""\
  66. <class 'pandas.core.series.Series'>
  67. RangeIndex: 2 entries, 0 to 1
  68. Series name: None
  69. Non-Null Count Dtype
  70. -------------- -----
  71. 2 non-null int64
  72. dtypes: int64(1)
  73. memory usage: {memory_bytes} bytes
  74. """
  75. )
  76. assert result == expected
  77. def test_info_wide():
  78. s = Series(np.random.randn(101))
  79. msg = "Argument `max_cols` can only be passed in DataFrame.info, not Series.info"
  80. with pytest.raises(ValueError, match=msg):
  81. s.info(max_cols=1)
  82. def test_info_shows_dtypes():
  83. dtypes = [
  84. "int64",
  85. "float64",
  86. "datetime64[ns]",
  87. "timedelta64[ns]",
  88. "complex128",
  89. "object",
  90. "bool",
  91. ]
  92. n = 10
  93. for dtype in dtypes:
  94. s = Series(np.random.randint(2, size=n).astype(dtype))
  95. buf = StringIO()
  96. s.info(buf=buf)
  97. res = buf.getvalue()
  98. name = f"{n:d} non-null {dtype}"
  99. assert name in res
  100. @pytest.mark.xfail(PYPY, reason="on PyPy deep=True doesn't change result")
  101. def test_info_memory_usage_deep_not_pypy():
  102. s_with_object_index = Series({"a": [1]}, index=["foo"])
  103. assert s_with_object_index.memory_usage(
  104. index=True, deep=True
  105. ) > s_with_object_index.memory_usage(index=True)
  106. s_object = Series({"a": ["a"]})
  107. assert s_object.memory_usage(deep=True) > s_object.memory_usage()
  108. @pytest.mark.xfail(not PYPY, reason="on PyPy deep=True does not change result")
  109. def test_info_memory_usage_deep_pypy():
  110. s_with_object_index = Series({"a": [1]}, index=["foo"])
  111. assert s_with_object_index.memory_usage(
  112. index=True, deep=True
  113. ) == s_with_object_index.memory_usage(index=True)
  114. s_object = Series({"a": ["a"]})
  115. assert s_object.memory_usage(deep=True) == s_object.memory_usage()
  116. @pytest.mark.parametrize(
  117. "series, plus",
  118. [
  119. (Series(1, index=[1, 2, 3]), False),
  120. (Series(1, index=list("ABC")), True),
  121. (Series(1, index=MultiIndex.from_product([range(3), range(3)])), False),
  122. (
  123. Series(1, index=MultiIndex.from_product([range(3), ["foo", "bar"]])),
  124. True,
  125. ),
  126. ],
  127. )
  128. def test_info_memory_usage_qualified(series, plus):
  129. buf = StringIO()
  130. series.info(buf=buf)
  131. if plus:
  132. assert "+" in buf.getvalue()
  133. else:
  134. assert "+" not in buf.getvalue()
  135. def test_info_memory_usage_bug_on_multiindex():
  136. # GH 14308
  137. # memory usage introspection should not materialize .values
  138. N = 100
  139. M = len(uppercase)
  140. index = MultiIndex.from_product(
  141. [list(uppercase), date_range("20160101", periods=N)],
  142. names=["id", "date"],
  143. )
  144. s = Series(np.random.randn(N * M), index=index)
  145. unstacked = s.unstack("id")
  146. assert s.values.nbytes == unstacked.values.nbytes
  147. assert s.memory_usage(deep=True) > unstacked.memory_usage(deep=True).sum()
  148. # high upper bound
  149. diff = unstacked.memory_usage(deep=True).sum() - s.memory_usage(deep=True)
  150. assert diff < 2000