test_finalize.py 25 KB


  1. """
  2. An exhaustive list of pandas methods exercising NDFrame.__finalize__.
  3. """
  4. import operator
  5. import re
  6. import numpy as np
  7. import pytest
  8. import pandas as pd
  9. # TODO:
  10. # * Binary methods (mul, div, etc.)
  11. # * Binary outputs (align, etc.)
  12. # * top-level methods (concat, merge, get_dummies, etc.)
  13. # * window
  14. # * cumulative reductions
  15. not_implemented_mark = pytest.mark.xfail(reason="not implemented")
  16. mi = pd.MultiIndex.from_product([["a", "b"], [0, 1]], names=["A", "B"])
  17. frame_data = ({"A": [1]},)
  18. frame_mi_data = ({"A": [1, 2, 3, 4]}, mi)
  19. # Tuple of
  20. # - Callable: Constructor (Series, DataFrame)
  21. # - Tuple: Constructor args
  22. # - Callable: pass the constructed value with attrs set to this.
  23. _all_methods = [
  24. (
  25. pd.Series,
  26. (np.array([0], dtype="float64")),
  27. operator.methodcaller("view", "int64"),
  28. ),
  29. (pd.Series, ([0],), operator.methodcaller("take", [])),
  30. (pd.Series, ([0],), operator.methodcaller("__getitem__", [True])),
  31. (pd.Series, ([0],), operator.methodcaller("repeat", 2)),
  32. (pd.Series, ([0],), operator.methodcaller("reset_index")),
  33. (pd.Series, ([0],), operator.methodcaller("reset_index", drop=True)),
  34. (pd.Series, ([0],), operator.methodcaller("to_frame")),
  35. (pd.Series, ([0, 0],), operator.methodcaller("drop_duplicates")),
  36. (pd.Series, ([0, 0],), operator.methodcaller("duplicated")),
  37. (pd.Series, ([0, 0],), operator.methodcaller("round")),
  38. (pd.Series, ([0, 0],), operator.methodcaller("rename", lambda x: x + 1)),
  39. (pd.Series, ([0, 0],), operator.methodcaller("rename", "name")),
  40. (pd.Series, ([0, 0],), operator.methodcaller("set_axis", ["a", "b"])),
  41. (pd.Series, ([0, 0],), operator.methodcaller("reindex", [1, 0])),
  42. (pd.Series, ([0, 0],), operator.methodcaller("drop", [0])),
  43. (pd.Series, (pd.array([0, pd.NA]),), operator.methodcaller("fillna", 0)),
  44. (pd.Series, ([0, 0],), operator.methodcaller("replace", {0: 1})),
  45. (pd.Series, ([0, 0],), operator.methodcaller("shift")),
  46. (pd.Series, ([0, 0],), operator.methodcaller("isin", [0, 1])),
  47. (pd.Series, ([0, 0],), operator.methodcaller("between", 0, 2)),
  48. (pd.Series, ([0, 0],), operator.methodcaller("isna")),
  49. (pd.Series, ([0, 0],), operator.methodcaller("isnull")),
  50. (pd.Series, ([0, 0],), operator.methodcaller("notna")),
  51. (pd.Series, ([0, 0],), operator.methodcaller("notnull")),
  52. (pd.Series, ([1],), operator.methodcaller("add", pd.Series([1]))),
  53. # TODO: mul, div, etc.
  54. (
  55. pd.Series,
  56. ([0], pd.period_range("2000", periods=1)),
  57. operator.methodcaller("to_timestamp"),
  58. ),
  59. (
  60. pd.Series,
  61. ([0], pd.date_range("2000", periods=1)),
  62. operator.methodcaller("to_period"),
  63. ),
  64. pytest.param(
  65. (
  66. pd.DataFrame,
  67. frame_data,
  68. operator.methodcaller("dot", pd.DataFrame(index=["A"])),
  69. ),
  70. marks=pytest.mark.xfail(reason="Implement binary finalize"),
  71. ),
  72. (pd.DataFrame, frame_data, operator.methodcaller("transpose")),
  73. (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", "A")),
  74. (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", ["A"])),
  75. (pd.DataFrame, frame_data, operator.methodcaller("__getitem__", np.array([True]))),
  76. (pd.DataFrame, ({("A", "a"): [1]},), operator.methodcaller("__getitem__", ["A"])),
  77. (pd.DataFrame, frame_data, operator.methodcaller("query", "A == 1")),
  78. (pd.DataFrame, frame_data, operator.methodcaller("eval", "A + 1", engine="python")),
  79. (pd.DataFrame, frame_data, operator.methodcaller("select_dtypes", include="int")),
  80. (pd.DataFrame, frame_data, operator.methodcaller("assign", b=1)),
  81. (pd.DataFrame, frame_data, operator.methodcaller("set_axis", ["A"])),
  82. (pd.DataFrame, frame_data, operator.methodcaller("reindex", [0, 1])),
  83. (pd.DataFrame, frame_data, operator.methodcaller("drop", columns=["A"])),
  84. (pd.DataFrame, frame_data, operator.methodcaller("drop", index=[0])),
  85. (pd.DataFrame, frame_data, operator.methodcaller("rename", columns={"A": "a"})),
  86. (pd.DataFrame, frame_data, operator.methodcaller("rename", index=lambda x: x)),
  87. (pd.DataFrame, frame_data, operator.methodcaller("fillna", "A")),
  88. (pd.DataFrame, frame_data, operator.methodcaller("fillna", method="ffill")),
  89. (pd.DataFrame, frame_data, operator.methodcaller("set_index", "A")),
  90. (pd.DataFrame, frame_data, operator.methodcaller("reset_index")),
  91. (pd.DataFrame, frame_data, operator.methodcaller("isna")),
  92. (pd.DataFrame, frame_data, operator.methodcaller("isnull")),
  93. (pd.DataFrame, frame_data, operator.methodcaller("notna")),
  94. (pd.DataFrame, frame_data, operator.methodcaller("notnull")),
  95. (pd.DataFrame, frame_data, operator.methodcaller("dropna")),
  96. (pd.DataFrame, frame_data, operator.methodcaller("drop_duplicates")),
  97. (pd.DataFrame, frame_data, operator.methodcaller("duplicated")),
  98. (pd.DataFrame, frame_data, operator.methodcaller("sort_values", by="A")),
  99. (pd.DataFrame, frame_data, operator.methodcaller("sort_index")),
  100. (pd.DataFrame, frame_data, operator.methodcaller("nlargest", 1, "A")),
  101. (pd.DataFrame, frame_data, operator.methodcaller("nsmallest", 1, "A")),
  102. (pd.DataFrame, frame_mi_data, operator.methodcaller("swaplevel")),
  103. (
  104. pd.DataFrame,
  105. frame_data,
  106. operator.methodcaller("add", pd.DataFrame(*frame_data)),
  107. ),
  108. # TODO: div, mul, etc.
  109. pytest.param(
  110. (
  111. pd.DataFrame,
  112. frame_data,
  113. operator.methodcaller("combine", pd.DataFrame(*frame_data), operator.add),
  114. ),
  115. marks=not_implemented_mark,
  116. ),
  117. pytest.param(
  118. (
  119. pd.DataFrame,
  120. frame_data,
  121. operator.methodcaller("combine_first", pd.DataFrame(*frame_data)),
  122. ),
  123. marks=not_implemented_mark,
  124. ),
  125. pytest.param(
  126. (
  127. pd.DataFrame,
  128. frame_data,
  129. operator.methodcaller("update", pd.DataFrame(*frame_data)),
  130. ),
  131. marks=not_implemented_mark,
  132. ),
  133. (pd.DataFrame, frame_data, operator.methodcaller("pivot", columns="A")),
  134. (
  135. pd.DataFrame,
  136. ({"A": [1], "B": [1]},),
  137. operator.methodcaller("pivot_table", columns="A"),
  138. ),
  139. (
  140. pd.DataFrame,
  141. ({"A": [1], "B": [1]},),
  142. operator.methodcaller("pivot_table", columns="A", aggfunc=["mean", "sum"]),
  143. ),
  144. (pd.DataFrame, frame_data, operator.methodcaller("stack")),
  145. (pd.DataFrame, frame_data, operator.methodcaller("explode", "A")),
  146. (pd.DataFrame, frame_mi_data, operator.methodcaller("unstack")),
  147. (
  148. pd.DataFrame,
  149. ({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]},),
  150. operator.methodcaller("melt", id_vars=["A"], value_vars=["B"]),
  151. ),
  152. pytest.param(
  153. (pd.DataFrame, frame_data, operator.methodcaller("applymap", lambda x: x))
  154. ),
  155. pytest.param(
  156. (
  157. pd.DataFrame,
  158. frame_data,
  159. operator.methodcaller("merge", pd.DataFrame({"A": [1]})),
  160. ),
  161. marks=not_implemented_mark,
  162. ),
  163. pytest.param(
  164. (pd.DataFrame, frame_data, operator.methodcaller("round", 2)),
  165. ),
  166. (pd.DataFrame, frame_data, operator.methodcaller("corr")),
  167. pytest.param(
  168. (pd.DataFrame, frame_data, operator.methodcaller("cov")),
  169. marks=[
  170. pytest.mark.filterwarnings("ignore::RuntimeWarning"),
  171. ],
  172. ),
  173. pytest.param(
  174. (
  175. pd.DataFrame,
  176. frame_data,
  177. operator.methodcaller("corrwith", pd.DataFrame(*frame_data)),
  178. ),
  179. marks=not_implemented_mark,
  180. ),
  181. pytest.param(
  182. (pd.DataFrame, frame_data, operator.methodcaller("count")),
  183. ),
  184. pytest.param(
  185. (pd.DataFrame, frame_data, operator.methodcaller("nunique")),
  186. ),
  187. (pd.DataFrame, frame_data, operator.methodcaller("idxmin")),
  188. (pd.DataFrame, frame_data, operator.methodcaller("idxmax")),
  189. (pd.DataFrame, frame_data, operator.methodcaller("mode")),
  190. pytest.param(
  191. (pd.Series, [0], operator.methodcaller("mode")),
  192. marks=not_implemented_mark,
  193. ),
  194. pytest.param(
  195. (
  196. pd.DataFrame,
  197. frame_data,
  198. operator.methodcaller("quantile", numeric_only=True),
  199. ),
  200. ),
  201. pytest.param(
  202. (
  203. pd.DataFrame,
  204. frame_data,
  205. operator.methodcaller("quantile", q=[0.25, 0.75], numeric_only=True),
  206. ),
  207. ),
  208. pytest.param(
  209. (
  210. pd.DataFrame,
  211. ({"A": [pd.Timedelta(days=1), pd.Timedelta(days=2)]},),
  212. operator.methodcaller("quantile", numeric_only=False),
  213. ),
  214. ),
  215. pytest.param(
  216. (
  217. pd.DataFrame,
  218. ({"A": [np.datetime64("2022-01-01"), np.datetime64("2022-01-02")]},),
  219. operator.methodcaller("quantile", numeric_only=True),
  220. ),
  221. ),
  222. (
  223. pd.DataFrame,
  224. ({"A": [1]}, [pd.Period("2000", "D")]),
  225. operator.methodcaller("to_timestamp"),
  226. ),
  227. (
  228. pd.DataFrame,
  229. ({"A": [1]}, [pd.Timestamp("2000")]),
  230. operator.methodcaller("to_period", freq="D"),
  231. ),
  232. pytest.param(
  233. (pd.DataFrame, frame_mi_data, operator.methodcaller("isin", [1])),
  234. ),
  235. pytest.param(
  236. (pd.DataFrame, frame_mi_data, operator.methodcaller("isin", pd.Series([1]))),
  237. ),
  238. pytest.param(
  239. (
  240. pd.DataFrame,
  241. frame_mi_data,
  242. operator.methodcaller("isin", pd.DataFrame({"A": [1]})),
  243. ),
  244. ),
  245. (pd.DataFrame, frame_data, operator.methodcaller("swapaxes", 0, 1)),
  246. (pd.DataFrame, frame_mi_data, operator.methodcaller("droplevel", "A")),
  247. (pd.DataFrame, frame_data, operator.methodcaller("pop", "A")),
  248. pytest.param(
  249. (pd.DataFrame, frame_data, operator.methodcaller("squeeze")),
  250. marks=not_implemented_mark,
  251. ),
  252. (pd.Series, ([1, 2],), operator.methodcaller("squeeze")),
  253. (pd.Series, ([1, 2],), operator.methodcaller("rename_axis", index="a")),
  254. (pd.DataFrame, frame_data, operator.methodcaller("rename_axis", columns="a")),
  255. # Unary ops
  256. (pd.DataFrame, frame_data, operator.neg),
  257. (pd.Series, [1], operator.neg),
  258. (pd.DataFrame, frame_data, operator.pos),
  259. (pd.Series, [1], operator.pos),
  260. (pd.DataFrame, frame_data, operator.inv),
  261. (pd.Series, [1], operator.inv),
  262. (pd.DataFrame, frame_data, abs),
  263. (pd.Series, [1], abs),
  264. pytest.param((pd.DataFrame, frame_data, round)),
  265. (pd.Series, [1], round),
  266. (pd.DataFrame, frame_data, operator.methodcaller("take", [0, 0])),
  267. (pd.DataFrame, frame_mi_data, operator.methodcaller("xs", "a")),
  268. (pd.Series, (1, mi), operator.methodcaller("xs", "a")),
  269. (pd.DataFrame, frame_data, operator.methodcaller("get", "A")),
  270. (
  271. pd.DataFrame,
  272. frame_data,
  273. operator.methodcaller("reindex_like", pd.DataFrame({"A": [1, 2, 3]})),
  274. ),
  275. (
  276. pd.Series,
  277. frame_data,
  278. operator.methodcaller("reindex_like", pd.Series([0, 1, 2])),
  279. ),
  280. (pd.DataFrame, frame_data, operator.methodcaller("add_prefix", "_")),
  281. (pd.DataFrame, frame_data, operator.methodcaller("add_suffix", "_")),
  282. (pd.Series, (1, ["a", "b"]), operator.methodcaller("add_prefix", "_")),
  283. (pd.Series, (1, ["a", "b"]), operator.methodcaller("add_suffix", "_")),
  284. (pd.Series, ([3, 2],), operator.methodcaller("sort_values")),
  285. (pd.Series, ([1] * 10,), operator.methodcaller("head")),
  286. (pd.DataFrame, ({"A": [1] * 10},), operator.methodcaller("head")),
  287. (pd.Series, ([1] * 10,), operator.methodcaller("tail")),
  288. (pd.DataFrame, ({"A": [1] * 10},), operator.methodcaller("tail")),
  289. (pd.Series, ([1, 2],), operator.methodcaller("sample", n=2, replace=True)),
  290. (pd.DataFrame, (frame_data,), operator.methodcaller("sample", n=2, replace=True)),
  291. (pd.Series, ([1, 2],), operator.methodcaller("astype", float)),
  292. (pd.DataFrame, frame_data, operator.methodcaller("astype", float)),
  293. (pd.Series, ([1, 2],), operator.methodcaller("copy")),
  294. (pd.DataFrame, frame_data, operator.methodcaller("copy")),
  295. (pd.Series, ([1, 2], None, object), operator.methodcaller("infer_objects")),
  296. (
  297. pd.DataFrame,
  298. ({"A": np.array([1, 2], dtype=object)},),
  299. operator.methodcaller("infer_objects"),
  300. ),
  301. (pd.Series, ([1, 2],), operator.methodcaller("convert_dtypes")),
  302. (pd.DataFrame, frame_data, operator.methodcaller("convert_dtypes")),
  303. (pd.Series, ([1, None, 3],), operator.methodcaller("interpolate")),
  304. (pd.DataFrame, ({"A": [1, None, 3]},), operator.methodcaller("interpolate")),
  305. (pd.Series, ([1, 2],), operator.methodcaller("clip", lower=1)),
  306. (pd.DataFrame, frame_data, operator.methodcaller("clip", lower=1)),
  307. (
  308. pd.Series,
  309. (1, pd.date_range("2000", periods=4)),
  310. operator.methodcaller("asfreq", "H"),
  311. ),
  312. (
  313. pd.DataFrame,
  314. ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
  315. operator.methodcaller("asfreq", "H"),
  316. ),
  317. (
  318. pd.Series,
  319. (1, pd.date_range("2000", periods=4)),
  320. operator.methodcaller("at_time", "12:00"),
  321. ),
  322. (
  323. pd.DataFrame,
  324. ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
  325. operator.methodcaller("at_time", "12:00"),
  326. ),
  327. (
  328. pd.Series,
  329. (1, pd.date_range("2000", periods=4)),
  330. operator.methodcaller("between_time", "12:00", "13:00"),
  331. ),
  332. (
  333. pd.DataFrame,
  334. ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
  335. operator.methodcaller("between_time", "12:00", "13:00"),
  336. ),
  337. (
  338. pd.Series,
  339. (1, pd.date_range("2000", periods=4)),
  340. operator.methodcaller("first", "3D"),
  341. ),
  342. (
  343. pd.DataFrame,
  344. ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
  345. operator.methodcaller("first", "3D"),
  346. ),
  347. (
  348. pd.Series,
  349. (1, pd.date_range("2000", periods=4)),
  350. operator.methodcaller("last", "3D"),
  351. ),
  352. (
  353. pd.DataFrame,
  354. ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
  355. operator.methodcaller("last", "3D"),
  356. ),
  357. (pd.Series, ([1, 2],), operator.methodcaller("rank")),
  358. (pd.DataFrame, frame_data, operator.methodcaller("rank")),
  359. (pd.Series, ([1, 2],), operator.methodcaller("where", np.array([True, False]))),
  360. (pd.DataFrame, frame_data, operator.methodcaller("where", np.array([[True]]))),
  361. (pd.Series, ([1, 2],), operator.methodcaller("mask", np.array([True, False]))),
  362. (pd.DataFrame, frame_data, operator.methodcaller("mask", np.array([[True]]))),
  363. (pd.Series, ([1, 2],), operator.methodcaller("truncate", before=0)),
  364. (pd.DataFrame, frame_data, operator.methodcaller("truncate", before=0)),
  365. (
  366. pd.Series,
  367. (1, pd.date_range("2000", periods=4, tz="UTC")),
  368. operator.methodcaller("tz_convert", "CET"),
  369. ),
  370. (
  371. pd.DataFrame,
  372. ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4, tz="UTC")),
  373. operator.methodcaller("tz_convert", "CET"),
  374. ),
  375. (
  376. pd.Series,
  377. (1, pd.date_range("2000", periods=4)),
  378. operator.methodcaller("tz_localize", "CET"),
  379. ),
  380. (
  381. pd.DataFrame,
  382. ({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
  383. operator.methodcaller("tz_localize", "CET"),
  384. ),
  385. pytest.param(
  386. (pd.Series, ([1, 2],), operator.methodcaller("describe")),
  387. marks=not_implemented_mark,
  388. ),
  389. pytest.param(
  390. (pd.DataFrame, frame_data, operator.methodcaller("describe")),
  391. marks=not_implemented_mark,
  392. ),
  393. (pd.Series, ([1, 2],), operator.methodcaller("pct_change")),
  394. (pd.DataFrame, frame_data, operator.methodcaller("pct_change")),
  395. (pd.Series, ([1],), operator.methodcaller("transform", lambda x: x - x.min())),
  396. pytest.param(
  397. (
  398. pd.DataFrame,
  399. frame_mi_data,
  400. operator.methodcaller("transform", lambda x: x - x.min()),
  401. ),
  402. ),
  403. (pd.Series, ([1],), operator.methodcaller("apply", lambda x: x)),
  404. pytest.param(
  405. (pd.DataFrame, frame_mi_data, operator.methodcaller("apply", lambda x: x)),
  406. ),
  407. # Cumulative reductions
  408. (pd.Series, ([1],), operator.methodcaller("cumsum")),
  409. (pd.DataFrame, frame_data, operator.methodcaller("cumsum")),
  410. # Reductions
  411. pytest.param(
  412. (pd.DataFrame, frame_data, operator.methodcaller("any")),
  413. marks=not_implemented_mark,
  414. ),
  415. pytest.param(
  416. (pd.DataFrame, frame_data, operator.methodcaller("sum")),
  417. marks=not_implemented_mark,
  418. ),
  419. pytest.param(
  420. (pd.DataFrame, frame_data, operator.methodcaller("std")),
  421. marks=not_implemented_mark,
  422. ),
  423. pytest.param(
  424. (pd.DataFrame, frame_data, operator.methodcaller("mean")),
  425. marks=not_implemented_mark,
  426. ),
  427. ]
  428. def idfn(x):
  429. xpr = re.compile(r"'(.*)?'")
  430. m = xpr.search(str(x))
  431. if m:
  432. return m.group(1)
  433. else:
  434. return str(x)
  435. @pytest.fixture(params=_all_methods, ids=lambda x: idfn(x[-1]))
  436. def ndframe_method(request):
  437. """
  438. An NDFrame method returning an NDFrame.
  439. """
  440. return request.param
  441. def test_finalize_called(ndframe_method):
  442. cls, init_args, method = ndframe_method
  443. ndframe = cls(*init_args)
  444. ndframe.attrs = {"a": 1}
  445. result = method(ndframe)
  446. assert result.attrs == {"a": 1}
  447. @not_implemented_mark
  448. def test_finalize_called_eval_numexpr():
  449. pytest.importorskip("numexpr")
  450. df = pd.DataFrame({"A": [1, 2]})
  451. df.attrs["A"] = 1
  452. result = df.eval("A + 1", engine="numexpr")
  453. assert result.attrs == {"A": 1}
  454. # ----------------------------------------------------------------------------
  455. # Binary operations
  456. @pytest.mark.parametrize("annotate", ["left", "right", "both"])
  457. @pytest.mark.parametrize(
  458. "args",
  459. [
  460. (1, pd.Series([1])),
  461. (1, pd.DataFrame({"A": [1]})),
  462. (pd.Series([1]), 1),
  463. (pd.DataFrame({"A": [1]}), 1),
  464. (pd.Series([1]), pd.Series([1])),
  465. (pd.DataFrame({"A": [1]}), pd.DataFrame({"A": [1]})),
  466. (pd.Series([1]), pd.DataFrame({"A": [1]})),
  467. (pd.DataFrame({"A": [1]}), pd.Series([1])),
  468. ],
  469. )
  470. def test_binops(request, args, annotate, all_binary_operators):
  471. # This generates 624 tests... Is that needed?
  472. left, right = args
  473. if annotate == "both" and isinstance(left, int) or isinstance(right, int):
  474. return
  475. if annotate in {"left", "both"} and not isinstance(left, int):
  476. left.attrs = {"a": 1}
  477. if annotate in {"left", "both"} and not isinstance(right, int):
  478. right.attrs = {"a": 1}
  479. is_cmp = all_binary_operators in [
  480. operator.eq,
  481. operator.ne,
  482. operator.gt,
  483. operator.ge,
  484. operator.lt,
  485. operator.le,
  486. ]
  487. if is_cmp and isinstance(left, pd.DataFrame) and isinstance(right, pd.Series):
  488. # in 2.0 silent alignment on comparisons was removed xref GH#28759
  489. left, right = left.align(right, axis=1, copy=False)
  490. elif is_cmp and isinstance(left, pd.Series) and isinstance(right, pd.DataFrame):
  491. right, left = right.align(left, axis=1, copy=False)
  492. result = all_binary_operators(left, right)
  493. assert result.attrs == {"a": 1}
  494. # ----------------------------------------------------------------------------
  495. # Accessors
  496. @pytest.mark.parametrize(
  497. "method",
  498. [
  499. operator.methodcaller("capitalize"),
  500. operator.methodcaller("casefold"),
  501. operator.methodcaller("cat", ["a"]),
  502. operator.methodcaller("contains", "a"),
  503. operator.methodcaller("count", "a"),
  504. operator.methodcaller("encode", "utf-8"),
  505. operator.methodcaller("endswith", "a"),
  506. operator.methodcaller("extract", r"(\w)(\d)"),
  507. operator.methodcaller("extract", r"(\w)(\d)", expand=False),
  508. operator.methodcaller("find", "a"),
  509. operator.methodcaller("findall", "a"),
  510. operator.methodcaller("get", 0),
  511. operator.methodcaller("index", "a"),
  512. operator.methodcaller("len"),
  513. operator.methodcaller("ljust", 4),
  514. operator.methodcaller("lower"),
  515. operator.methodcaller("lstrip"),
  516. operator.methodcaller("match", r"\w"),
  517. operator.methodcaller("normalize", "NFC"),
  518. operator.methodcaller("pad", 4),
  519. operator.methodcaller("partition", "a"),
  520. operator.methodcaller("repeat", 2),
  521. operator.methodcaller("replace", "a", "b"),
  522. operator.methodcaller("rfind", "a"),
  523. operator.methodcaller("rindex", "a"),
  524. operator.methodcaller("rjust", 4),
  525. operator.methodcaller("rpartition", "a"),
  526. operator.methodcaller("rstrip"),
  527. operator.methodcaller("slice", 4),
  528. operator.methodcaller("slice_replace", 1, repl="a"),
  529. operator.methodcaller("startswith", "a"),
  530. operator.methodcaller("strip"),
  531. operator.methodcaller("swapcase"),
  532. operator.methodcaller("translate", {"a": "b"}),
  533. operator.methodcaller("upper"),
  534. operator.methodcaller("wrap", 4),
  535. operator.methodcaller("zfill", 4),
  536. operator.methodcaller("isalnum"),
  537. operator.methodcaller("isalpha"),
  538. operator.methodcaller("isdigit"),
  539. operator.methodcaller("isspace"),
  540. operator.methodcaller("islower"),
  541. operator.methodcaller("isupper"),
  542. operator.methodcaller("istitle"),
  543. operator.methodcaller("isnumeric"),
  544. operator.methodcaller("isdecimal"),
  545. operator.methodcaller("get_dummies"),
  546. ],
  547. ids=idfn,
  548. )
  549. def test_string_method(method):
  550. s = pd.Series(["a1"])
  551. s.attrs = {"a": 1}
  552. result = method(s.str)
  553. assert result.attrs == {"a": 1}
  554. @pytest.mark.parametrize(
  555. "method",
  556. [
  557. operator.methodcaller("to_period"),
  558. operator.methodcaller("tz_localize", "CET"),
  559. operator.methodcaller("normalize"),
  560. operator.methodcaller("strftime", "%Y"),
  561. operator.methodcaller("round", "H"),
  562. operator.methodcaller("floor", "H"),
  563. operator.methodcaller("ceil", "H"),
  564. operator.methodcaller("month_name"),
  565. operator.methodcaller("day_name"),
  566. ],
  567. ids=idfn,
  568. )
  569. def test_datetime_method(method):
  570. s = pd.Series(pd.date_range("2000", periods=4))
  571. s.attrs = {"a": 1}
  572. result = method(s.dt)
  573. assert result.attrs == {"a": 1}
  574. @pytest.mark.parametrize(
  575. "attr",
  576. [
  577. "date",
  578. "time",
  579. "timetz",
  580. "year",
  581. "month",
  582. "day",
  583. "hour",
  584. "minute",
  585. "second",
  586. "microsecond",
  587. "nanosecond",
  588. "dayofweek",
  589. "day_of_week",
  590. "dayofyear",
  591. "day_of_year",
  592. "quarter",
  593. "is_month_start",
  594. "is_month_end",
  595. "is_quarter_start",
  596. "is_quarter_end",
  597. "is_year_start",
  598. "is_year_end",
  599. "is_leap_year",
  600. "daysinmonth",
  601. "days_in_month",
  602. ],
  603. )
  604. def test_datetime_property(attr):
  605. s = pd.Series(pd.date_range("2000", periods=4))
  606. s.attrs = {"a": 1}
  607. result = getattr(s.dt, attr)
  608. assert result.attrs == {"a": 1}
  609. @pytest.mark.parametrize(
  610. "attr", ["days", "seconds", "microseconds", "nanoseconds", "components"]
  611. )
  612. def test_timedelta_property(attr):
  613. s = pd.Series(pd.timedelta_range("2000", periods=4))
  614. s.attrs = {"a": 1}
  615. result = getattr(s.dt, attr)
  616. assert result.attrs == {"a": 1}
  617. @pytest.mark.parametrize("method", [operator.methodcaller("total_seconds")])
  618. def test_timedelta_methods(method):
  619. s = pd.Series(pd.timedelta_range("2000", periods=4))
  620. s.attrs = {"a": 1}
  621. result = method(s.dt)
  622. assert result.attrs == {"a": 1}
  623. @pytest.mark.parametrize(
  624. "method",
  625. [
  626. operator.methodcaller("add_categories", ["c"]),
  627. operator.methodcaller("as_ordered"),
  628. operator.methodcaller("as_unordered"),
  629. lambda x: getattr(x, "codes"),
  630. operator.methodcaller("remove_categories", "a"),
  631. operator.methodcaller("remove_unused_categories"),
  632. operator.methodcaller("rename_categories", {"a": "A", "b": "B"}),
  633. operator.methodcaller("reorder_categories", ["b", "a"]),
  634. operator.methodcaller("set_categories", ["A", "B"]),
  635. ],
  636. )
  637. @not_implemented_mark
  638. def test_categorical_accessor(method):
  639. s = pd.Series(["a", "b"], dtype="category")
  640. s.attrs = {"a": 1}
  641. result = method(s.cat)
  642. assert result.attrs == {"a": 1}
  643. # ----------------------------------------------------------------------------
  644. # Groupby
  645. @pytest.mark.parametrize(
  646. "obj", [pd.Series([0, 0]), pd.DataFrame({"A": [0, 1], "B": [1, 2]})]
  647. )
  648. @pytest.mark.parametrize(
  649. "method",
  650. [
  651. operator.methodcaller("sum"),
  652. lambda x: x.apply(lambda y: y),
  653. lambda x: x.agg("sum"),
  654. lambda x: x.agg("mean"),
  655. lambda x: x.agg("median"),
  656. ],
  657. )
  658. def test_groupby_finalize(obj, method):
  659. obj.attrs = {"a": 1}
  660. result = method(obj.groupby([0, 0], group_keys=False))
  661. assert result.attrs == {"a": 1}
  662. @pytest.mark.parametrize(
  663. "obj", [pd.Series([0, 0]), pd.DataFrame({"A": [0, 1], "B": [1, 2]})]
  664. )
  665. @pytest.mark.parametrize(
  666. "method",
  667. [
  668. lambda x: x.agg(["sum", "count"]),
  669. lambda x: x.agg("std"),
  670. lambda x: x.agg("var"),
  671. lambda x: x.agg("sem"),
  672. lambda x: x.agg("size"),
  673. lambda x: x.agg("ohlc"),
  674. lambda x: x.agg("describe"),
  675. ],
  676. )
  677. @not_implemented_mark
  678. def test_groupby_finalize_not_implemented(obj, method):
  679. obj.attrs = {"a": 1}
  680. result = method(obj.groupby([0, 0]))
  681. assert result.attrs == {"a": 1}
  682. def test_finalize_frame_series_name():
  683. # https://github.com/pandas-dev/pandas/pull/37186/files#r506978889
  684. # ensure we don't copy the column `name` to the Series.
  685. df = pd.DataFrame({"name": [1, 2]})
  686. result = pd.Series([1, 2]).__finalize__(df)
  687. assert result.name is None