test_api.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. import inspect
  2. import pydoc
  3. import numpy as np
  4. import pytest
  5. from pandas.util._test_decorators import skip_if_no
  6. import pandas as pd
  7. from pandas import (
  8. DataFrame,
  9. Index,
  10. Series,
  11. date_range,
  12. )
  13. import pandas._testing as tm
  14. class TestSeriesMisc:
  15. def test_tab_completion(self):
  16. # GH 9910
  17. s = Series(list("abcd"))
  18. # Series of str values should have .str but not .dt/.cat in __dir__
  19. assert "str" in dir(s)
  20. assert "dt" not in dir(s)
  21. assert "cat" not in dir(s)
  22. def test_tab_completion_dt(self):
  23. # similarly for .dt
  24. s = Series(date_range("1/1/2015", periods=5))
  25. assert "dt" in dir(s)
  26. assert "str" not in dir(s)
  27. assert "cat" not in dir(s)
  28. def test_tab_completion_cat(self):
  29. # Similarly for .cat, but with the twist that str and dt should be
  30. # there if the categories are of that type first cat and str.
  31. s = Series(list("abbcd"), dtype="category")
  32. assert "cat" in dir(s)
  33. assert "str" in dir(s) # as it is a string categorical
  34. assert "dt" not in dir(s)
  35. def test_tab_completion_cat_str(self):
  36. # similar to cat and str
  37. s = Series(date_range("1/1/2015", periods=5)).astype("category")
  38. assert "cat" in dir(s)
  39. assert "str" not in dir(s)
  40. assert "dt" in dir(s) # as it is a datetime categorical
  41. def test_tab_completion_with_categorical(self):
  42. # test the tab completion display
  43. ok_for_cat = [
  44. "categories",
  45. "codes",
  46. "ordered",
  47. "set_categories",
  48. "add_categories",
  49. "remove_categories",
  50. "rename_categories",
  51. "reorder_categories",
  52. "remove_unused_categories",
  53. "as_ordered",
  54. "as_unordered",
  55. ]
  56. s = Series(list("aabbcde")).astype("category")
  57. results = sorted({r for r in s.cat.__dir__() if not r.startswith("_")})
  58. tm.assert_almost_equal(results, sorted(set(ok_for_cat)))
  59. @pytest.mark.parametrize(
  60. "index",
  61. [
  62. tm.makeStringIndex(10),
  63. tm.makeCategoricalIndex(10),
  64. Index(["foo", "bar", "baz"] * 2),
  65. tm.makeDateIndex(10),
  66. tm.makePeriodIndex(10),
  67. tm.makeTimedeltaIndex(10),
  68. tm.makeIntIndex(10),
  69. tm.makeUIntIndex(10),
  70. tm.makeIntIndex(10),
  71. tm.makeFloatIndex(10),
  72. Index([True, False]),
  73. Index([f"a{i}" for i in range(101)]),
  74. pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")),
  75. pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")),
  76. ],
  77. )
  78. def test_index_tab_completion(self, index):
  79. # dir contains string-like values of the Index.
  80. s = Series(index=index, dtype=object)
  81. dir_s = dir(s)
  82. for i, x in enumerate(s.index.unique(level=0)):
  83. if i < 100:
  84. assert not isinstance(x, str) or not x.isidentifier() or x in dir_s
  85. else:
  86. assert x not in dir_s
  87. @pytest.mark.parametrize("ser", [Series(dtype=object), Series([1])])
  88. def test_not_hashable(self, ser):
  89. msg = "unhashable type: 'Series'"
  90. with pytest.raises(TypeError, match=msg):
  91. hash(ser)
  92. def test_contains(self, datetime_series):
  93. tm.assert_contains_all(datetime_series.index, datetime_series)
  94. def test_axis_alias(self):
  95. s = Series([1, 2, np.nan])
  96. tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index"))
  97. assert s.dropna().sum("rows") == 3
  98. assert s._get_axis_number("rows") == 0
  99. assert s._get_axis_name("rows") == "index"
  100. def test_class_axis(self):
  101. # https://github.com/pandas-dev/pandas/issues/18147
  102. # no exception and no empty docstring
  103. assert pydoc.getdoc(Series.index)
  104. def test_ndarray_compat(self):
  105. # test numpy compat with Series as sub-class of NDFrame
  106. tsdf = DataFrame(
  107. np.random.randn(1000, 3),
  108. columns=["A", "B", "C"],
  109. index=date_range("1/1/2000", periods=1000),
  110. )
  111. def f(x):
  112. return x[x.idxmax()]
  113. result = tsdf.apply(f)
  114. expected = tsdf.max()
  115. tm.assert_series_equal(result, expected)
  116. def test_ndarray_compat_like_func(self):
  117. # using an ndarray like function
  118. s = Series(np.random.randn(10))
  119. result = Series(np.ones_like(s))
  120. expected = Series(1, index=range(10), dtype="float64")
  121. tm.assert_series_equal(result, expected)
  122. def test_ndarray_compat_ravel(self):
  123. # ravel
  124. s = Series(np.random.randn(10))
  125. tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F"))
  126. def test_empty_method(self):
  127. s_empty = Series(dtype=object)
  128. assert s_empty.empty
  129. @pytest.mark.parametrize("dtype", ["int64", object])
  130. def test_empty_method_full_series(self, dtype):
  131. full_series = Series(index=[1], dtype=dtype)
  132. assert not full_series.empty
  133. @pytest.mark.parametrize("dtype", [None, "Int64"])
  134. def test_integer_series_size(self, dtype):
  135. # GH 25580
  136. s = Series(range(9), dtype=dtype)
  137. assert s.size == 9
  138. def test_attrs(self):
  139. s = Series([0, 1], name="abc")
  140. assert s.attrs == {}
  141. s.attrs["version"] = 1
  142. result = s + 1
  143. assert result.attrs == {"version": 1}
  144. @skip_if_no("jinja2")
  145. def test_inspect_getmembers(self):
  146. # GH38782
  147. ser = Series(dtype=object)
  148. with tm.assert_produces_warning(None, check_stacklevel=False):
  149. inspect.getmembers(ser)
  150. def test_unknown_attribute(self):
  151. # GH#9680
  152. tdi = pd.timedelta_range(start=0, periods=10, freq="1s")
  153. ser = Series(np.random.normal(size=10), index=tdi)
  154. assert "foo" not in ser.__dict__
  155. msg = "'Series' object has no attribute 'foo'"
  156. with pytest.raises(AttributeError, match=msg):
  157. ser.foo
  158. @pytest.mark.parametrize("op", ["year", "day", "second", "weekday"])
  159. def test_datetime_series_no_datelike_attrs(self, op, datetime_series):
  160. # GH#7206
  161. msg = f"'Series' object has no attribute '{op}'"
  162. with pytest.raises(AttributeError, match=msg):
  163. getattr(datetime_series, op)
  164. def test_series_datetimelike_attribute_access(self):
  165. # attribute access should still work!
  166. ser = Series({"year": 2000, "month": 1, "day": 10})
  167. assert ser.year == 2000
  168. assert ser.month == 1
  169. assert ser.day == 10
  170. def test_series_datetimelike_attribute_access_invalid(self):
  171. ser = Series({"year": 2000, "month": 1, "day": 10})
  172. msg = "'Series' object has no attribute 'weekday'"
  173. with pytest.raises(AttributeError, match=msg):
  174. ser.weekday
  175. @pytest.mark.parametrize(
  176. "kernel, has_numeric_only",
  177. [
  178. ("skew", True),
  179. ("var", True),
  180. ("all", False),
  181. ("prod", True),
  182. ("any", False),
  183. ("idxmin", False),
  184. ("quantile", False),
  185. ("idxmax", False),
  186. ("min", True),
  187. ("sem", True),
  188. ("mean", True),
  189. ("nunique", False),
  190. ("max", True),
  191. ("sum", True),
  192. ("count", False),
  193. ("median", True),
  194. ("std", True),
  195. ("backfill", False),
  196. ("rank", True),
  197. ("pct_change", False),
  198. ("cummax", False),
  199. ("shift", False),
  200. ("diff", False),
  201. ("cumsum", False),
  202. ("cummin", False),
  203. ("cumprod", False),
  204. ("fillna", False),
  205. ("ffill", False),
  206. ("pad", False),
  207. ("bfill", False),
  208. ("sample", False),
  209. ("tail", False),
  210. ("take", False),
  211. ("head", False),
  212. ("cov", False),
  213. ("corr", False),
  214. ],
  215. )
  216. @pytest.mark.parametrize("dtype", [bool, int, float, object])
  217. def test_numeric_only(self, kernel, has_numeric_only, dtype):
  218. # GH#47500
  219. ser = Series([0, 1, 1], dtype=dtype)
  220. if kernel == "corrwith":
  221. args = (ser,)
  222. elif kernel == "corr":
  223. args = (ser,)
  224. elif kernel == "cov":
  225. args = (ser,)
  226. elif kernel == "nth":
  227. args = (0,)
  228. elif kernel == "fillna":
  229. args = (True,)
  230. elif kernel == "fillna":
  231. args = ("ffill",)
  232. elif kernel == "take":
  233. args = ([0],)
  234. elif kernel == "quantile":
  235. args = (0.5,)
  236. else:
  237. args = ()
  238. method = getattr(ser, kernel)
  239. if not has_numeric_only:
  240. msg = (
  241. "(got an unexpected keyword argument 'numeric_only'"
  242. "|too many arguments passed in)"
  243. )
  244. with pytest.raises(TypeError, match=msg):
  245. method(*args, numeric_only=True)
  246. elif dtype is object:
  247. msg = f"Series.{kernel} does not allow numeric_only=True with non-numeric"
  248. with pytest.raises(TypeError, match=msg):
  249. method(*args, numeric_only=True)
  250. else:
  251. result = method(*args, numeric_only=True)
  252. expected = method(*args, numeric_only=False)
  253. if isinstance(expected, Series):
  254. # transformer
  255. tm.assert_series_equal(result, expected)
  256. else:
  257. # reducer
  258. assert result == expected
  259. @pytest.mark.parametrize("converter", [int, float, complex])
  260. def test_float_int_deprecated(converter):
  261. # GH 51101
  262. with tm.assert_produces_warning(FutureWarning):
  263. assert converter(Series([1])) == converter(1)