test_api.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. from copy import deepcopy
  2. import inspect
  3. import pydoc
  4. import numpy as np
  5. import pytest
  6. from pandas._config.config import option_context
  7. from pandas.util._test_decorators import (
  8. async_mark,
  9. skip_if_no,
  10. )
  11. import pandas as pd
  12. from pandas import (
  13. DataFrame,
  14. Series,
  15. date_range,
  16. timedelta_range,
  17. )
  18. import pandas._testing as tm
  19. class TestDataFrameMisc:
  20. def test_getitem_pop_assign_name(self, float_frame):
  21. s = float_frame["A"]
  22. assert s.name == "A"
  23. s = float_frame.pop("A")
  24. assert s.name == "A"
  25. s = float_frame.loc[:, "B"]
  26. assert s.name == "B"
  27. s2 = s.loc[:]
  28. assert s2.name == "B"
  29. def test_get_axis(self, float_frame):
  30. f = float_frame
  31. assert f._get_axis_number(0) == 0
  32. assert f._get_axis_number(1) == 1
  33. assert f._get_axis_number("index") == 0
  34. assert f._get_axis_number("rows") == 0
  35. assert f._get_axis_number("columns") == 1
  36. assert f._get_axis_name(0) == "index"
  37. assert f._get_axis_name(1) == "columns"
  38. assert f._get_axis_name("index") == "index"
  39. assert f._get_axis_name("rows") == "index"
  40. assert f._get_axis_name("columns") == "columns"
  41. assert f._get_axis(0) is f.index
  42. assert f._get_axis(1) is f.columns
  43. with pytest.raises(ValueError, match="No axis named"):
  44. f._get_axis_number(2)
  45. with pytest.raises(ValueError, match="No axis.*foo"):
  46. f._get_axis_name("foo")
  47. with pytest.raises(ValueError, match="No axis.*None"):
  48. f._get_axis_name(None)
  49. with pytest.raises(ValueError, match="No axis named"):
  50. f._get_axis_number(None)
  51. def test_column_contains_raises(self, float_frame):
  52. with pytest.raises(TypeError, match="unhashable type: 'Index'"):
  53. float_frame.columns in float_frame
  54. def test_tab_completion(self):
  55. # DataFrame whose columns are identifiers shall have them in __dir__.
  56. df = DataFrame([list("abcd"), list("efgh")], columns=list("ABCD"))
  57. for key in list("ABCD"):
  58. assert key in dir(df)
  59. assert isinstance(df.__getitem__("A"), Series)
  60. # DataFrame whose first-level columns are identifiers shall have
  61. # them in __dir__.
  62. df = DataFrame(
  63. [list("abcd"), list("efgh")],
  64. columns=pd.MultiIndex.from_tuples(list(zip("ABCD", "EFGH"))),
  65. )
  66. for key in list("ABCD"):
  67. assert key in dir(df)
  68. for key in list("EFGH"):
  69. assert key not in dir(df)
  70. assert isinstance(df.__getitem__("A"), DataFrame)
  71. def test_display_max_dir_items(self):
  72. # display.max_dir_items increaes the number of columns that are in __dir__.
  73. columns = ["a" + str(i) for i in range(420)]
  74. values = [range(420), range(420)]
  75. df = DataFrame(values, columns=columns)
  76. # The default value for display.max_dir_items is 100
  77. assert "a99" in dir(df)
  78. assert "a100" not in dir(df)
  79. with option_context("display.max_dir_items", 300):
  80. df = DataFrame(values, columns=columns)
  81. assert "a299" in dir(df)
  82. assert "a300" not in dir(df)
  83. with option_context("display.max_dir_items", None):
  84. df = DataFrame(values, columns=columns)
  85. assert "a419" in dir(df)
  86. def test_not_hashable(self):
  87. empty_frame = DataFrame()
  88. df = DataFrame([1])
  89. msg = "unhashable type: 'DataFrame'"
  90. with pytest.raises(TypeError, match=msg):
  91. hash(df)
  92. with pytest.raises(TypeError, match=msg):
  93. hash(empty_frame)
  94. def test_column_name_contains_unicode_surrogate(self):
  95. # GH 25509
  96. colname = "\ud83d"
  97. df = DataFrame({colname: []})
  98. # this should not crash
  99. assert colname not in dir(df)
  100. assert df.columns[0] == colname
  101. def test_new_empty_index(self):
  102. df1 = DataFrame(np.random.randn(0, 3))
  103. df2 = DataFrame(np.random.randn(0, 3))
  104. df1.index.name = "foo"
  105. assert df2.index.name is None
  106. def test_get_agg_axis(self, float_frame):
  107. cols = float_frame._get_agg_axis(0)
  108. assert cols is float_frame.columns
  109. idx = float_frame._get_agg_axis(1)
  110. assert idx is float_frame.index
  111. msg = r"Axis must be 0 or 1 \(got 2\)"
  112. with pytest.raises(ValueError, match=msg):
  113. float_frame._get_agg_axis(2)
  114. def test_empty(self, float_frame, float_string_frame):
  115. empty_frame = DataFrame()
  116. assert empty_frame.empty
  117. assert not float_frame.empty
  118. assert not float_string_frame.empty
  119. # corner case
  120. df = DataFrame({"A": [1.0, 2.0, 3.0], "B": ["a", "b", "c"]}, index=np.arange(3))
  121. del df["A"]
  122. assert not df.empty
  123. def test_len(self, float_frame):
  124. assert len(float_frame) == len(float_frame.index)
  125. # single block corner case
  126. arr = float_frame[["A", "B"]].values
  127. expected = float_frame.reindex(columns=["A", "B"]).values
  128. tm.assert_almost_equal(arr, expected)
  129. def test_axis_aliases(self, float_frame):
  130. f = float_frame
  131. # reg name
  132. expected = f.sum(axis=0)
  133. result = f.sum(axis="index")
  134. tm.assert_series_equal(result, expected)
  135. expected = f.sum(axis=1)
  136. result = f.sum(axis="columns")
  137. tm.assert_series_equal(result, expected)
  138. def test_class_axis(self):
  139. # GH 18147
  140. # no exception and no empty docstring
  141. assert pydoc.getdoc(DataFrame.index)
  142. assert pydoc.getdoc(DataFrame.columns)
  143. def test_series_put_names(self, float_string_frame):
  144. series = float_string_frame._series
  145. for k, v in series.items():
  146. assert v.name == k
  147. def test_empty_nonzero(self):
  148. df = DataFrame([1, 2, 3])
  149. assert not df.empty
  150. df = DataFrame(index=[1], columns=[1])
  151. assert not df.empty
  152. df = DataFrame(index=["a", "b"], columns=["c", "d"]).dropna()
  153. assert df.empty
  154. assert df.T.empty
  155. @pytest.mark.parametrize(
  156. "df",
  157. [
  158. DataFrame(),
  159. DataFrame(index=[1]),
  160. DataFrame(columns=[1]),
  161. DataFrame({1: []}),
  162. ],
  163. )
  164. def test_empty_like(self, df):
  165. assert df.empty
  166. assert df.T.empty
  167. def test_with_datetimelikes(self):
  168. df = DataFrame(
  169. {
  170. "A": date_range("20130101", periods=10),
  171. "B": timedelta_range("1 day", periods=10),
  172. }
  173. )
  174. t = df.T
  175. result = t.dtypes.value_counts()
  176. expected = Series({np.dtype("object"): 10}, name="count")
  177. tm.assert_series_equal(result, expected)
  178. def test_deepcopy(self, float_frame):
  179. cp = deepcopy(float_frame)
  180. series = cp["A"]
  181. series[:] = 10
  182. for idx, value in series.items():
  183. assert float_frame["A"][idx] != value
  184. def test_inplace_return_self(self):
  185. # GH 1893
  186. data = DataFrame(
  187. {"a": ["foo", "bar", "baz", "qux"], "b": [0, 0, 1, 1], "c": [1, 2, 3, 4]}
  188. )
  189. def _check_f(base, f):
  190. result = f(base)
  191. assert result is None
  192. # -----DataFrame-----
  193. # set_index
  194. f = lambda x: x.set_index("a", inplace=True)
  195. _check_f(data.copy(), f)
  196. # reset_index
  197. f = lambda x: x.reset_index(inplace=True)
  198. _check_f(data.set_index("a"), f)
  199. # drop_duplicates
  200. f = lambda x: x.drop_duplicates(inplace=True)
  201. _check_f(data.copy(), f)
  202. # sort
  203. f = lambda x: x.sort_values("b", inplace=True)
  204. _check_f(data.copy(), f)
  205. # sort_index
  206. f = lambda x: x.sort_index(inplace=True)
  207. _check_f(data.copy(), f)
  208. # fillna
  209. f = lambda x: x.fillna(0, inplace=True)
  210. _check_f(data.copy(), f)
  211. # replace
  212. f = lambda x: x.replace(1, 0, inplace=True)
  213. _check_f(data.copy(), f)
  214. # rename
  215. f = lambda x: x.rename({1: "foo"}, inplace=True)
  216. _check_f(data.copy(), f)
  217. # -----Series-----
  218. d = data.copy()["c"]
  219. # reset_index
  220. f = lambda x: x.reset_index(inplace=True, drop=True)
  221. _check_f(data.set_index("a")["c"], f)
  222. # fillna
  223. f = lambda x: x.fillna(0, inplace=True)
  224. _check_f(d.copy(), f)
  225. # replace
  226. f = lambda x: x.replace(1, 0, inplace=True)
  227. _check_f(d.copy(), f)
  228. # rename
  229. f = lambda x: x.rename({1: "foo"}, inplace=True)
  230. _check_f(d.copy(), f)
  231. @async_mark()
  232. async def test_tab_complete_warning(self, ip, frame_or_series):
  233. # GH 16409
  234. pytest.importorskip("IPython", minversion="6.0.0")
  235. from IPython.core.completer import provisionalcompleter
  236. if frame_or_series is DataFrame:
  237. code = "from pandas import DataFrame; obj = DataFrame()"
  238. else:
  239. code = "from pandas import Series; obj = Series(dtype=object)"
  240. await ip.run_code(code)
  241. # GH 31324 newer jedi version raises Deprecation warning;
  242. # appears resolved 2021-02-02
  243. with tm.assert_produces_warning(None):
  244. with provisionalcompleter("ignore"):
  245. list(ip.Completer.completions("obj.", 1))
  246. def test_attrs(self):
  247. df = DataFrame({"A": [2, 3]})
  248. assert df.attrs == {}
  249. df.attrs["version"] = 1
  250. result = df.rename(columns=str)
  251. assert result.attrs == {"version": 1}
  252. @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None])
  253. def test_set_flags(
  254. self, allows_duplicate_labels, frame_or_series, using_copy_on_write
  255. ):
  256. obj = DataFrame({"A": [1, 2]})
  257. key = (0, 0)
  258. if frame_or_series is Series:
  259. obj = obj["A"]
  260. key = 0
  261. result = obj.set_flags(allows_duplicate_labels=allows_duplicate_labels)
  262. if allows_duplicate_labels is None:
  263. # We don't update when it's not provided
  264. assert result.flags.allows_duplicate_labels is True
  265. else:
  266. assert result.flags.allows_duplicate_labels is allows_duplicate_labels
  267. # We made a copy
  268. assert obj is not result
  269. # We didn't mutate obj
  270. assert obj.flags.allows_duplicate_labels is True
  271. # But we didn't copy data
  272. if frame_or_series is Series:
  273. assert np.may_share_memory(obj.values, result.values)
  274. else:
  275. assert np.may_share_memory(obj["A"].values, result["A"].values)
  276. result.iloc[key] = 0
  277. if using_copy_on_write:
  278. assert obj.iloc[key] == 1
  279. else:
  280. assert obj.iloc[key] == 0
  281. # set back to 1 for test below
  282. result.iloc[key] = 1
  283. # Now we do copy.
  284. result = obj.set_flags(
  285. copy=True, allows_duplicate_labels=allows_duplicate_labels
  286. )
  287. result.iloc[key] = 10
  288. assert obj.iloc[key] == 1
  289. def test_constructor_expanddim(self):
  290. # GH#33628 accessing _constructor_expanddim should not raise NotImplementedError
  291. # GH38782 pandas has no container higher than DataFrame (two-dim), so
  292. # DataFrame._constructor_expand_dim, doesn't make sense, so is removed.
  293. df = DataFrame()
  294. msg = "'DataFrame' object has no attribute '_constructor_expanddim'"
  295. with pytest.raises(AttributeError, match=msg):
  296. df._constructor_expanddim(np.arange(27).reshape(3, 3, 3))
  297. @skip_if_no("jinja2")
  298. def test_inspect_getmembers(self):
  299. # GH38740
  300. df = DataFrame()
  301. with tm.assert_produces_warning(None):
  302. inspect.getmembers(df)