test_frame_apply.py 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644
  1. from datetime import datetime
  2. import warnings
  3. import numpy as np
  4. import pytest
  5. from pandas.core.dtypes.dtypes import CategoricalDtype
  6. import pandas as pd
  7. from pandas import (
  8. DataFrame,
  9. MultiIndex,
  10. Series,
  11. Timestamp,
  12. date_range,
  13. )
  14. import pandas._testing as tm
  15. from pandas.tests.frame.common import zip_frames
  16. def test_apply(float_frame):
  17. with np.errstate(all="ignore"):
  18. # ufunc
  19. result = np.sqrt(float_frame["A"])
  20. expected = float_frame.apply(np.sqrt)["A"]
  21. tm.assert_series_equal(result, expected)
  22. # aggregator
  23. result = float_frame.apply(np.mean)["A"]
  24. expected = np.mean(float_frame["A"])
  25. assert result == expected
  26. d = float_frame.index[0]
  27. result = float_frame.apply(np.mean, axis=1)
  28. expected = np.mean(float_frame.xs(d))
  29. assert result[d] == expected
  30. assert result.index is float_frame.index
  31. def test_apply_categorical_func():
  32. # GH 9573
  33. df = DataFrame({"c0": ["A", "A", "B", "B"], "c1": ["C", "C", "D", "D"]})
  34. result = df.apply(lambda ts: ts.astype("category"))
  35. assert result.shape == (4, 2)
  36. assert isinstance(result["c0"].dtype, CategoricalDtype)
  37. assert isinstance(result["c1"].dtype, CategoricalDtype)
  38. def test_apply_axis1_with_ea():
  39. # GH#36785
  40. expected = DataFrame({"A": [Timestamp("2013-01-01", tz="UTC")]})
  41. result = expected.apply(lambda x: x, axis=1)
  42. tm.assert_frame_equal(result, expected)
  43. @pytest.mark.parametrize(
  44. "data, dtype",
  45. [(1, None), (1, CategoricalDtype([1])), (Timestamp("2013-01-01", tz="UTC"), None)],
  46. )
  47. def test_agg_axis1_duplicate_index(data, dtype):
  48. # GH 42380
  49. expected = DataFrame([[data], [data]], index=["a", "a"], dtype=dtype)
  50. result = expected.agg(lambda x: x, axis=1)
  51. tm.assert_frame_equal(result, expected)
  52. def test_apply_mixed_datetimelike():
  53. # mixed datetimelike
  54. # GH 7778
  55. expected = DataFrame(
  56. {
  57. "A": date_range("20130101", periods=3),
  58. "B": pd.to_timedelta(np.arange(3), unit="s"),
  59. }
  60. )
  61. result = expected.apply(lambda x: x, axis=1)
  62. tm.assert_frame_equal(result, expected)
  63. @pytest.mark.parametrize("func", [np.sqrt, np.mean])
  64. def test_apply_empty(func):
  65. # empty
  66. empty_frame = DataFrame()
  67. result = empty_frame.apply(func)
  68. assert result.empty
  69. def test_apply_float_frame(float_frame):
  70. no_rows = float_frame[:0]
  71. result = no_rows.apply(lambda x: x.mean())
  72. expected = Series(np.nan, index=float_frame.columns)
  73. tm.assert_series_equal(result, expected)
  74. no_cols = float_frame.loc[:, []]
  75. result = no_cols.apply(lambda x: x.mean(), axis=1)
  76. expected = Series(np.nan, index=float_frame.index)
  77. tm.assert_series_equal(result, expected)
  78. def test_apply_empty_except_index():
  79. # GH 2476
  80. expected = DataFrame(index=["a"])
  81. result = expected.apply(lambda x: x["a"], axis=1)
  82. tm.assert_frame_equal(result, expected)
  83. def test_apply_with_reduce_empty():
  84. # reduce with an empty DataFrame
  85. empty_frame = DataFrame()
  86. x = []
  87. result = empty_frame.apply(x.append, axis=1, result_type="expand")
  88. tm.assert_frame_equal(result, empty_frame)
  89. result = empty_frame.apply(x.append, axis=1, result_type="reduce")
  90. expected = Series([], dtype=np.float64)
  91. tm.assert_series_equal(result, expected)
  92. empty_with_cols = DataFrame(columns=["a", "b", "c"])
  93. result = empty_with_cols.apply(x.append, axis=1, result_type="expand")
  94. tm.assert_frame_equal(result, empty_with_cols)
  95. result = empty_with_cols.apply(x.append, axis=1, result_type="reduce")
  96. expected = Series([], dtype=np.float64)
  97. tm.assert_series_equal(result, expected)
  98. # Ensure that x.append hasn't been called
  99. assert x == []
  100. @pytest.mark.parametrize("func", ["sum", "prod", "any", "all"])
  101. def test_apply_funcs_over_empty(func):
  102. # GH 28213
  103. df = DataFrame(columns=["a", "b", "c"])
  104. result = df.apply(getattr(np, func))
  105. expected = getattr(df, func)()
  106. if func in ("sum", "prod"):
  107. expected = expected.astype(float)
  108. tm.assert_series_equal(result, expected)
  109. def test_nunique_empty():
  110. # GH 28213
  111. df = DataFrame(columns=["a", "b", "c"])
  112. result = df.nunique()
  113. expected = Series(0, index=df.columns)
  114. tm.assert_series_equal(result, expected)
  115. result = df.T.nunique()
  116. expected = Series([], dtype=np.float64)
  117. tm.assert_series_equal(result, expected)
  118. def test_apply_standard_nonunique():
  119. df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["a", "a", "c"])
  120. result = df.apply(lambda s: s[0], axis=1)
  121. expected = Series([1, 4, 7], ["a", "a", "c"])
  122. tm.assert_series_equal(result, expected)
  123. result = df.T.apply(lambda s: s[0], axis=0)
  124. tm.assert_series_equal(result, expected)
  125. def test_apply_broadcast_scalars(float_frame):
  126. # scalars
  127. result = float_frame.apply(np.mean, result_type="broadcast")
  128. expected = DataFrame([float_frame.mean()], index=float_frame.index)
  129. tm.assert_frame_equal(result, expected)
  130. def test_apply_broadcast_scalars_axis1(float_frame):
  131. result = float_frame.apply(np.mean, axis=1, result_type="broadcast")
  132. m = float_frame.mean(axis=1)
  133. expected = DataFrame({c: m for c in float_frame.columns})
  134. tm.assert_frame_equal(result, expected)
  135. def test_apply_broadcast_lists_columns(float_frame):
  136. # lists
  137. result = float_frame.apply(
  138. lambda x: list(range(len(float_frame.columns))),
  139. axis=1,
  140. result_type="broadcast",
  141. )
  142. m = list(range(len(float_frame.columns)))
  143. expected = DataFrame(
  144. [m] * len(float_frame.index),
  145. dtype="float64",
  146. index=float_frame.index,
  147. columns=float_frame.columns,
  148. )
  149. tm.assert_frame_equal(result, expected)
  150. def test_apply_broadcast_lists_index(float_frame):
  151. result = float_frame.apply(
  152. lambda x: list(range(len(float_frame.index))), result_type="broadcast"
  153. )
  154. m = list(range(len(float_frame.index)))
  155. expected = DataFrame(
  156. {c: m for c in float_frame.columns},
  157. dtype="float64",
  158. index=float_frame.index,
  159. )
  160. tm.assert_frame_equal(result, expected)
  161. def test_apply_broadcast_list_lambda_func(int_frame_const_col):
  162. # preserve columns
  163. df = int_frame_const_col
  164. result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="broadcast")
  165. tm.assert_frame_equal(result, df)
  166. def test_apply_broadcast_series_lambda_func(int_frame_const_col):
  167. df = int_frame_const_col
  168. result = df.apply(
  169. lambda x: Series([1, 2, 3], index=list("abc")),
  170. axis=1,
  171. result_type="broadcast",
  172. )
  173. expected = df.copy()
  174. tm.assert_frame_equal(result, expected)
  175. @pytest.mark.parametrize("axis", [0, 1])
  176. def test_apply_raw_float_frame(float_frame, axis):
  177. def _assert_raw(x):
  178. assert isinstance(x, np.ndarray)
  179. assert x.ndim == 1
  180. float_frame.apply(_assert_raw, axis=axis, raw=True)
  181. @pytest.mark.parametrize("axis", [0, 1])
  182. def test_apply_raw_float_frame_lambda(float_frame, axis):
  183. result = float_frame.apply(np.mean, axis=axis, raw=True)
  184. expected = float_frame.apply(lambda x: x.values.mean(), axis=axis)
  185. tm.assert_series_equal(result, expected)
  186. def test_apply_raw_float_frame_no_reduction(float_frame):
  187. # no reduction
  188. result = float_frame.apply(lambda x: x * 2, raw=True)
  189. expected = float_frame * 2
  190. tm.assert_frame_equal(result, expected)
  191. @pytest.mark.parametrize("axis", [0, 1])
  192. def test_apply_raw_mixed_type_frame(mixed_type_frame, axis):
  193. def _assert_raw(x):
  194. assert isinstance(x, np.ndarray)
  195. assert x.ndim == 1
  196. # Mixed dtype (GH-32423)
  197. mixed_type_frame.apply(_assert_raw, axis=axis, raw=True)
  198. def test_apply_axis1(float_frame):
  199. d = float_frame.index[0]
  200. result = float_frame.apply(np.mean, axis=1)[d]
  201. expected = np.mean(float_frame.xs(d))
  202. assert result == expected
  203. def test_apply_mixed_dtype_corner():
  204. df = DataFrame({"A": ["foo"], "B": [1.0]})
  205. result = df[:0].apply(np.mean, axis=1)
  206. # the result here is actually kind of ambiguous, should it be a Series
  207. # or a DataFrame?
  208. expected = Series(np.nan, index=pd.Index([], dtype="int64"))
  209. tm.assert_series_equal(result, expected)
  210. def test_apply_mixed_dtype_corner_indexing():
  211. df = DataFrame({"A": ["foo"], "B": [1.0]})
  212. result = df.apply(lambda x: x["A"], axis=1)
  213. expected = Series(["foo"], index=[0])
  214. tm.assert_series_equal(result, expected)
  215. result = df.apply(lambda x: x["B"], axis=1)
  216. expected = Series([1.0], index=[0])
  217. tm.assert_series_equal(result, expected)
  218. @pytest.mark.parametrize("ax", ["index", "columns"])
  219. @pytest.mark.parametrize(
  220. "func", [lambda x: x, lambda x: x.mean()], ids=["identity", "mean"]
  221. )
  222. @pytest.mark.parametrize("raw", [True, False])
  223. @pytest.mark.parametrize("axis", [0, 1])
  224. def test_apply_empty_infer_type(ax, func, raw, axis):
  225. df = DataFrame(**{ax: ["a", "b", "c"]})
  226. with np.errstate(all="ignore"):
  227. with warnings.catch_warnings(record=True):
  228. warnings.simplefilter("ignore", RuntimeWarning)
  229. test_res = func(np.array([], dtype="f8"))
  230. is_reduction = not isinstance(test_res, np.ndarray)
  231. result = df.apply(func, axis=axis, raw=raw)
  232. if is_reduction:
  233. agg_axis = df._get_agg_axis(axis)
  234. assert isinstance(result, Series)
  235. assert result.index is agg_axis
  236. else:
  237. assert isinstance(result, DataFrame)
  238. def test_apply_empty_infer_type_broadcast():
  239. no_cols = DataFrame(index=["a", "b", "c"])
  240. result = no_cols.apply(lambda x: x.mean(), result_type="broadcast")
  241. assert isinstance(result, DataFrame)
  242. def test_apply_with_args_kwds_add_some(float_frame):
  243. def add_some(x, howmuch=0):
  244. return x + howmuch
  245. result = float_frame.apply(add_some, howmuch=2)
  246. expected = float_frame.apply(lambda x: x + 2)
  247. tm.assert_frame_equal(result, expected)
  248. def test_apply_with_args_kwds_agg_and_add(float_frame):
  249. def agg_and_add(x, howmuch=0):
  250. return x.mean() + howmuch
  251. result = float_frame.apply(agg_and_add, howmuch=2)
  252. expected = float_frame.apply(lambda x: x.mean() + 2)
  253. tm.assert_series_equal(result, expected)
  254. def test_apply_with_args_kwds_subtract_and_divide(float_frame):
  255. def subtract_and_divide(x, sub, divide=1):
  256. return (x - sub) / divide
  257. result = float_frame.apply(subtract_and_divide, args=(2,), divide=2)
  258. expected = float_frame.apply(lambda x: (x - 2.0) / 2.0)
  259. tm.assert_frame_equal(result, expected)
  260. def test_apply_yield_list(float_frame):
  261. result = float_frame.apply(list)
  262. tm.assert_frame_equal(result, float_frame)
  263. def test_apply_reduce_Series(float_frame):
  264. float_frame.iloc[::2, float_frame.columns.get_loc("A")] = np.nan
  265. expected = float_frame.mean(1)
  266. result = float_frame.apply(np.mean, axis=1)
  267. tm.assert_series_equal(result, expected)
  268. def test_apply_reduce_to_dict():
  269. # GH 25196 37544
  270. data = DataFrame([[1, 2], [3, 4]], columns=["c0", "c1"], index=["i0", "i1"])
  271. result = data.apply(dict, axis=0)
  272. expected = Series([{"i0": 1, "i1": 3}, {"i0": 2, "i1": 4}], index=data.columns)
  273. tm.assert_series_equal(result, expected)
  274. result = data.apply(dict, axis=1)
  275. expected = Series([{"c0": 1, "c1": 2}, {"c0": 3, "c1": 4}], index=data.index)
  276. tm.assert_series_equal(result, expected)
  277. def test_apply_differently_indexed():
  278. df = DataFrame(np.random.randn(20, 10))
  279. result = df.apply(Series.describe, axis=0)
  280. expected = DataFrame({i: v.describe() for i, v in df.items()}, columns=df.columns)
  281. tm.assert_frame_equal(result, expected)
  282. result = df.apply(Series.describe, axis=1)
  283. expected = DataFrame({i: v.describe() for i, v in df.T.items()}, columns=df.index).T
  284. tm.assert_frame_equal(result, expected)
  285. def test_apply_bug():
  286. # GH 6125
  287. positions = DataFrame(
  288. [
  289. [1, "ABC0", 50],
  290. [1, "YUM0", 20],
  291. [1, "DEF0", 20],
  292. [2, "ABC1", 50],
  293. [2, "YUM1", 20],
  294. [2, "DEF1", 20],
  295. ],
  296. columns=["a", "market", "position"],
  297. )
  298. def f(r):
  299. return r["market"]
  300. expected = positions.apply(f, axis=1)
  301. positions = DataFrame(
  302. [
  303. [datetime(2013, 1, 1), "ABC0", 50],
  304. [datetime(2013, 1, 2), "YUM0", 20],
  305. [datetime(2013, 1, 3), "DEF0", 20],
  306. [datetime(2013, 1, 4), "ABC1", 50],
  307. [datetime(2013, 1, 5), "YUM1", 20],
  308. [datetime(2013, 1, 6), "DEF1", 20],
  309. ],
  310. columns=["a", "market", "position"],
  311. )
  312. result = positions.apply(f, axis=1)
  313. tm.assert_series_equal(result, expected)
  314. def test_apply_convert_objects():
  315. expected = DataFrame(
  316. {
  317. "A": [
  318. "foo",
  319. "foo",
  320. "foo",
  321. "foo",
  322. "bar",
  323. "bar",
  324. "bar",
  325. "bar",
  326. "foo",
  327. "foo",
  328. "foo",
  329. ],
  330. "B": [
  331. "one",
  332. "one",
  333. "one",
  334. "two",
  335. "one",
  336. "one",
  337. "one",
  338. "two",
  339. "two",
  340. "two",
  341. "one",
  342. ],
  343. "C": [
  344. "dull",
  345. "dull",
  346. "shiny",
  347. "dull",
  348. "dull",
  349. "shiny",
  350. "shiny",
  351. "dull",
  352. "shiny",
  353. "shiny",
  354. "shiny",
  355. ],
  356. "D": np.random.randn(11),
  357. "E": np.random.randn(11),
  358. "F": np.random.randn(11),
  359. }
  360. )
  361. result = expected.apply(lambda x: x, axis=1)
  362. tm.assert_frame_equal(result, expected)
  363. def test_apply_attach_name(float_frame):
  364. result = float_frame.apply(lambda x: x.name)
  365. expected = Series(float_frame.columns, index=float_frame.columns)
  366. tm.assert_series_equal(result, expected)
  367. def test_apply_attach_name_axis1(float_frame):
  368. result = float_frame.apply(lambda x: x.name, axis=1)
  369. expected = Series(float_frame.index, index=float_frame.index)
  370. tm.assert_series_equal(result, expected)
  371. def test_apply_attach_name_non_reduction(float_frame):
  372. # non-reductions
  373. result = float_frame.apply(lambda x: np.repeat(x.name, len(x)))
  374. expected = DataFrame(
  375. np.tile(float_frame.columns, (len(float_frame.index), 1)),
  376. index=float_frame.index,
  377. columns=float_frame.columns,
  378. )
  379. tm.assert_frame_equal(result, expected)
  380. def test_apply_attach_name_non_reduction_axis1(float_frame):
  381. result = float_frame.apply(lambda x: np.repeat(x.name, len(x)), axis=1)
  382. expected = Series(
  383. np.repeat(t[0], len(float_frame.columns)) for t in float_frame.itertuples()
  384. )
  385. expected.index = float_frame.index
  386. tm.assert_series_equal(result, expected)
  387. def test_apply_multi_index():
  388. index = MultiIndex.from_arrays([["a", "a", "b"], ["c", "d", "d"]])
  389. s = DataFrame([[1, 2], [3, 4], [5, 6]], index=index, columns=["col1", "col2"])
  390. result = s.apply(lambda x: Series({"min": min(x), "max": max(x)}), 1)
  391. expected = DataFrame([[1, 2], [3, 4], [5, 6]], index=index, columns=["min", "max"])
  392. tm.assert_frame_equal(result, expected, check_like=True)
  393. @pytest.mark.parametrize(
  394. "df, dicts",
  395. [
  396. [
  397. DataFrame([["foo", "bar"], ["spam", "eggs"]]),
  398. Series([{0: "foo", 1: "spam"}, {0: "bar", 1: "eggs"}]),
  399. ],
  400. [DataFrame([[0, 1], [2, 3]]), Series([{0: 0, 1: 2}, {0: 1, 1: 3}])],
  401. ],
  402. )
  403. def test_apply_dict(df, dicts):
  404. # GH 8735
  405. fn = lambda x: x.to_dict()
  406. reduce_true = df.apply(fn, result_type="reduce")
  407. reduce_false = df.apply(fn, result_type="expand")
  408. reduce_none = df.apply(fn)
  409. tm.assert_series_equal(reduce_true, dicts)
  410. tm.assert_frame_equal(reduce_false, df)
  411. tm.assert_series_equal(reduce_none, dicts)
  412. def test_applymap(float_frame):
  413. applied = float_frame.applymap(lambda x: x * 2)
  414. tm.assert_frame_equal(applied, float_frame * 2)
  415. float_frame.applymap(type)
  416. # GH 465: function returning tuples
  417. result = float_frame.applymap(lambda x: (x, x))["A"][0]
  418. assert isinstance(result, tuple)
  419. @pytest.mark.parametrize("val", [1, 1.0])
  420. def test_applymap_float_object_conversion(val):
  421. # GH 2909: object conversion to float in constructor?
  422. df = DataFrame(data=[val, "a"])
  423. result = df.applymap(lambda x: x).dtypes[0]
  424. assert result == object
  425. def test_applymap_str():
  426. # GH 2786
  427. df = DataFrame(np.random.random((3, 4)))
  428. df2 = df.copy()
  429. cols = ["a", "a", "a", "a"]
  430. df.columns = cols
  431. expected = df2.applymap(str)
  432. expected.columns = cols
  433. result = df.applymap(str)
  434. tm.assert_frame_equal(result, expected)
  435. @pytest.mark.parametrize(
  436. "col, val",
  437. [["datetime", Timestamp("20130101")], ["timedelta", pd.Timedelta("1 min")]],
  438. )
  439. def test_applymap_datetimelike(col, val):
  440. # datetime/timedelta
  441. df = DataFrame(np.random.random((3, 4)))
  442. df[col] = val
  443. result = df.applymap(str)
  444. assert result.loc[0, col] == str(df.loc[0, col])
  445. @pytest.mark.parametrize(
  446. "expected",
  447. [
  448. DataFrame(),
  449. DataFrame(columns=list("ABC")),
  450. DataFrame(index=list("ABC")),
  451. DataFrame({"A": [], "B": [], "C": []}),
  452. ],
  453. )
  454. @pytest.mark.parametrize("func", [round, lambda x: x])
  455. def test_applymap_empty(expected, func):
  456. # GH 8222
  457. result = expected.applymap(func)
  458. tm.assert_frame_equal(result, expected)
  459. def test_applymap_kwargs():
  460. # GH 40652
  461. result = DataFrame([[1, 2], [3, 4]]).applymap(lambda x, y: x + y, y=2)
  462. expected = DataFrame([[3, 4], [5, 6]])
  463. tm.assert_frame_equal(result, expected)
  464. def test_applymap_na_ignore(float_frame):
  465. # GH 23803
  466. strlen_frame = float_frame.applymap(lambda x: len(str(x)))
  467. float_frame_with_na = float_frame.copy()
  468. mask = np.random.randint(0, 2, size=float_frame.shape, dtype=bool)
  469. float_frame_with_na[mask] = pd.NA
  470. strlen_frame_na_ignore = float_frame_with_na.applymap(
  471. lambda x: len(str(x)), na_action="ignore"
  472. )
  473. strlen_frame_with_na = strlen_frame.copy()
  474. strlen_frame_with_na[mask] = pd.NA
  475. tm.assert_frame_equal(strlen_frame_na_ignore, strlen_frame_with_na)
  476. def test_applymap_box_timestamps():
  477. # GH 2689, GH 2627
  478. ser = Series(date_range("1/1/2000", periods=10))
  479. def func(x):
  480. return (x.hour, x.day, x.month)
  481. # it works!
  482. DataFrame(ser).applymap(func)
  483. def test_applymap_box():
  484. # ufunc will not be boxed. Same test cases as the test_map_box
  485. df = DataFrame(
  486. {
  487. "a": [Timestamp("2011-01-01"), Timestamp("2011-01-02")],
  488. "b": [
  489. Timestamp("2011-01-01", tz="US/Eastern"),
  490. Timestamp("2011-01-02", tz="US/Eastern"),
  491. ],
  492. "c": [pd.Timedelta("1 days"), pd.Timedelta("2 days")],
  493. "d": [
  494. pd.Period("2011-01-01", freq="M"),
  495. pd.Period("2011-01-02", freq="M"),
  496. ],
  497. }
  498. )
  499. result = df.applymap(lambda x: type(x).__name__)
  500. expected = DataFrame(
  501. {
  502. "a": ["Timestamp", "Timestamp"],
  503. "b": ["Timestamp", "Timestamp"],
  504. "c": ["Timedelta", "Timedelta"],
  505. "d": ["Period", "Period"],
  506. }
  507. )
  508. tm.assert_frame_equal(result, expected)
  509. def test_frame_apply_dont_convert_datetime64():
  510. from pandas.tseries.offsets import BDay
  511. df = DataFrame({"x1": [datetime(1996, 1, 1)]})
  512. df = df.applymap(lambda x: x + BDay())
  513. df = df.applymap(lambda x: x + BDay())
  514. result = df.x1.dtype
  515. assert result == "M8[ns]"
  516. def test_apply_non_numpy_dtype():
  517. # GH 12244
  518. df = DataFrame({"dt": date_range("2015-01-01", periods=3, tz="Europe/Brussels")})
  519. result = df.apply(lambda x: x)
  520. tm.assert_frame_equal(result, df)
  521. result = df.apply(lambda x: x + pd.Timedelta("1day"))
  522. expected = DataFrame(
  523. {"dt": date_range("2015-01-02", periods=3, tz="Europe/Brussels")}
  524. )
  525. tm.assert_frame_equal(result, expected)
  526. def test_apply_non_numpy_dtype_category():
  527. df = DataFrame({"dt": ["a", "b", "c", "a"]}, dtype="category")
  528. result = df.apply(lambda x: x)
  529. tm.assert_frame_equal(result, df)
  530. def test_apply_dup_names_multi_agg():
  531. # GH 21063
  532. df = DataFrame([[0, 1], [2, 3]], columns=["a", "a"])
  533. expected = DataFrame([[0, 1]], columns=["a", "a"], index=["min"])
  534. result = df.agg(["min"])
  535. tm.assert_frame_equal(result, expected)
  536. @pytest.mark.parametrize("op", ["apply", "agg"])
  537. def test_apply_nested_result_axis_1(op):
  538. # GH 13820
  539. def apply_list(row):
  540. return [2 * row["A"], 2 * row["C"], 2 * row["B"]]
  541. df = DataFrame(np.zeros((4, 4)), columns=list("ABCD"))
  542. result = getattr(df, op)(apply_list, axis=1)
  543. expected = Series(
  544. [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
  545. )
  546. tm.assert_series_equal(result, expected)
  547. def test_apply_noreduction_tzaware_object():
  548. # https://github.com/pandas-dev/pandas/issues/31505
  549. expected = DataFrame(
  550. {"foo": [Timestamp("2020", tz="UTC")]}, dtype="datetime64[ns, UTC]"
  551. )
  552. result = expected.apply(lambda x: x)
  553. tm.assert_frame_equal(result, expected)
  554. result = expected.apply(lambda x: x.copy())
  555. tm.assert_frame_equal(result, expected)
  556. def test_apply_function_runs_once():
  557. # https://github.com/pandas-dev/pandas/issues/30815
  558. df = DataFrame({"a": [1, 2, 3]})
  559. names = [] # Save row names function is applied to
  560. def reducing_function(row):
  561. names.append(row.name)
  562. def non_reducing_function(row):
  563. names.append(row.name)
  564. return row
  565. for func in [reducing_function, non_reducing_function]:
  566. del names[:]
  567. df.apply(func, axis=1)
  568. assert names == list(df.index)
  569. def test_apply_raw_function_runs_once():
  570. # https://github.com/pandas-dev/pandas/issues/34506
  571. df = DataFrame({"a": [1, 2, 3]})
  572. values = [] # Save row values function is applied to
  573. def reducing_function(row):
  574. values.extend(row)
  575. def non_reducing_function(row):
  576. values.extend(row)
  577. return row
  578. for func in [reducing_function, non_reducing_function]:
  579. del values[:]
  580. df.apply(func, raw=True, axis=1)
  581. assert values == list(df.a.to_list())
  582. def test_applymap_function_runs_once():
  583. df = DataFrame({"a": [1, 2, 3]})
  584. values = [] # Save values function is applied to
  585. def reducing_function(val):
  586. values.append(val)
  587. def non_reducing_function(val):
  588. values.append(val)
  589. return val
  590. for func in [reducing_function, non_reducing_function]:
  591. del values[:]
  592. df.applymap(func)
  593. assert values == df.a.to_list()
  594. def test_apply_with_byte_string():
  595. # GH 34529
  596. df = DataFrame(np.array([b"abcd", b"efgh"]), columns=["col"])
  597. expected = DataFrame(np.array([b"abcd", b"efgh"]), columns=["col"], dtype=object)
  598. # After we make the apply we expect a dataframe just
  599. # like the original but with the object datatype
  600. result = df.apply(lambda x: x.astype("object"))
  601. tm.assert_frame_equal(result, expected)
  602. @pytest.mark.parametrize("val", ["asd", 12, None, np.NaN])
  603. def test_apply_category_equalness(val):
  604. # Check if categorical comparisons on apply, GH 21239
  605. df_values = ["asd", None, 12, "asd", "cde", np.NaN]
  606. df = DataFrame({"a": df_values}, dtype="category")
  607. result = df.a.apply(lambda x: x == val)
  608. expected = Series(
  609. [np.NaN if pd.isnull(x) else x == val for x in df_values], name="a"
  610. )
  611. tm.assert_series_equal(result, expected)
  612. # the user has supplied an opaque UDF where
  613. # they are transforming the input that requires
  614. # us to infer the output
  615. def test_infer_row_shape():
  616. # GH 17437
  617. # if row shape is changing, infer it
  618. df = DataFrame(np.random.rand(10, 2))
  619. result = df.apply(np.fft.fft, axis=0).shape
  620. assert result == (10, 2)
  621. result = df.apply(np.fft.rfft, axis=0).shape
  622. assert result == (6, 2)
  623. def test_with_dictlike_columns():
  624. # GH 17602
  625. df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
  626. result = df.apply(lambda x: {"s": x["a"] + x["b"]}, axis=1)
  627. expected = Series([{"s": 3} for t in df.itertuples()])
  628. tm.assert_series_equal(result, expected)
  629. df["tm"] = [
  630. Timestamp("2017-05-01 00:00:00"),
  631. Timestamp("2017-05-02 00:00:00"),
  632. ]
  633. result = df.apply(lambda x: {"s": x["a"] + x["b"]}, axis=1)
  634. tm.assert_series_equal(result, expected)
  635. # compose a series
  636. result = (df["a"] + df["b"]).apply(lambda x: {"s": x})
  637. expected = Series([{"s": 3}, {"s": 3}])
  638. tm.assert_series_equal(result, expected)
  639. def test_with_dictlike_columns_with_datetime():
  640. # GH 18775
  641. df = DataFrame()
  642. df["author"] = ["X", "Y", "Z"]
  643. df["publisher"] = ["BBC", "NBC", "N24"]
  644. df["date"] = pd.to_datetime(
  645. ["17-10-2010 07:15:30", "13-05-2011 08:20:35", "15-01-2013 09:09:09"],
  646. dayfirst=True,
  647. )
  648. result = df.apply(lambda x: {}, axis=1)
  649. expected = Series([{}, {}, {}])
  650. tm.assert_series_equal(result, expected)
  651. def test_with_dictlike_columns_with_infer():
  652. # GH 17602
  653. df = DataFrame([[1, 2], [1, 2]], columns=["a", "b"])
  654. result = df.apply(lambda x: {"s": x["a"] + x["b"]}, axis=1, result_type="expand")
  655. expected = DataFrame({"s": [3, 3]})
  656. tm.assert_frame_equal(result, expected)
  657. df["tm"] = [
  658. Timestamp("2017-05-01 00:00:00"),
  659. Timestamp("2017-05-02 00:00:00"),
  660. ]
  661. result = df.apply(lambda x: {"s": x["a"] + x["b"]}, axis=1, result_type="expand")
  662. tm.assert_frame_equal(result, expected)
  663. def test_with_listlike_columns():
  664. # GH 17348
  665. df = DataFrame(
  666. {
  667. "a": Series(np.random.randn(4)),
  668. "b": ["a", "list", "of", "words"],
  669. "ts": date_range("2016-10-01", periods=4, freq="H"),
  670. }
  671. )
  672. result = df[["a", "b"]].apply(tuple, axis=1)
  673. expected = Series([t[1:] for t in df[["a", "b"]].itertuples()])
  674. tm.assert_series_equal(result, expected)
  675. result = df[["a", "ts"]].apply(tuple, axis=1)
  676. expected = Series([t[1:] for t in df[["a", "ts"]].itertuples()])
  677. tm.assert_series_equal(result, expected)
  678. def test_with_listlike_columns_returning_list():
  679. # GH 18919
  680. df = DataFrame({"x": Series([["a", "b"], ["q"]]), "y": Series([["z"], ["q", "t"]])})
  681. df.index = MultiIndex.from_tuples([("i0", "j0"), ("i1", "j1")])
  682. result = df.apply(lambda row: [el for el in row["x"] if el in row["y"]], axis=1)
  683. expected = Series([[], ["q"]], index=df.index)
  684. tm.assert_series_equal(result, expected)
  685. def test_infer_output_shape_columns():
  686. # GH 18573
  687. df = DataFrame(
  688. {
  689. "number": [1.0, 2.0],
  690. "string": ["foo", "bar"],
  691. "datetime": [
  692. Timestamp("2017-11-29 03:30:00"),
  693. Timestamp("2017-11-29 03:45:00"),
  694. ],
  695. }
  696. )
  697. result = df.apply(lambda row: (row.number, row.string), axis=1)
  698. expected = Series([(t.number, t.string) for t in df.itertuples()])
  699. tm.assert_series_equal(result, expected)
  700. def test_infer_output_shape_listlike_columns():
  701. # GH 16353
  702. df = DataFrame(np.random.randn(6, 3), columns=["A", "B", "C"])
  703. result = df.apply(lambda x: [1, 2, 3], axis=1)
  704. expected = Series([[1, 2, 3] for t in df.itertuples()])
  705. tm.assert_series_equal(result, expected)
  706. result = df.apply(lambda x: [1, 2], axis=1)
  707. expected = Series([[1, 2] for t in df.itertuples()])
  708. tm.assert_series_equal(result, expected)
  709. @pytest.mark.parametrize("val", [1, 2])
  710. def test_infer_output_shape_listlike_columns_np_func(val):
  711. # GH 17970
  712. df = DataFrame({"a": [1, 2, 3]}, index=list("abc"))
  713. result = df.apply(lambda row: np.ones(val), axis=1)
  714. expected = Series([np.ones(val) for t in df.itertuples()], index=df.index)
  715. tm.assert_series_equal(result, expected)
  716. def test_infer_output_shape_listlike_columns_with_timestamp():
  717. # GH 17892
  718. df = DataFrame(
  719. {
  720. "a": [
  721. Timestamp("2010-02-01"),
  722. Timestamp("2010-02-04"),
  723. Timestamp("2010-02-05"),
  724. Timestamp("2010-02-06"),
  725. ],
  726. "b": [9, 5, 4, 3],
  727. "c": [5, 3, 4, 2],
  728. "d": [1, 2, 3, 4],
  729. }
  730. )
  731. def fun(x):
  732. return (1, 2)
  733. result = df.apply(fun, axis=1)
  734. expected = Series([(1, 2) for t in df.itertuples()])
  735. tm.assert_series_equal(result, expected)
  736. @pytest.mark.parametrize("lst", [[1, 2, 3], [1, 2]])
  737. def test_consistent_coerce_for_shapes(lst):
  738. # we want column names to NOT be propagated
  739. # just because the shape matches the input shape
  740. df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"])
  741. result = df.apply(lambda x: lst, axis=1)
  742. expected = Series([lst for t in df.itertuples()])
  743. tm.assert_series_equal(result, expected)
  744. def test_consistent_names(int_frame_const_col):
  745. # if a Series is returned, we should use the resulting index names
  746. df = int_frame_const_col
  747. result = df.apply(
  748. lambda x: Series([1, 2, 3], index=["test", "other", "cols"]), axis=1
  749. )
  750. expected = int_frame_const_col.rename(
  751. columns={"A": "test", "B": "other", "C": "cols"}
  752. )
  753. tm.assert_frame_equal(result, expected)
  754. result = df.apply(lambda x: Series([1, 2], index=["test", "other"]), axis=1)
  755. expected = expected[["test", "other"]]
  756. tm.assert_frame_equal(result, expected)
  757. def test_result_type(int_frame_const_col):
  758. # result_type should be consistent no matter which
  759. # path we take in the code
  760. df = int_frame_const_col
  761. result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="expand")
  762. expected = df.copy()
  763. expected.columns = [0, 1, 2]
  764. tm.assert_frame_equal(result, expected)
  765. def test_result_type_shorter_list(int_frame_const_col):
  766. # result_type should be consistent no matter which
  767. # path we take in the code
  768. df = int_frame_const_col
  769. result = df.apply(lambda x: [1, 2], axis=1, result_type="expand")
  770. expected = df[["A", "B"]].copy()
  771. expected.columns = [0, 1]
  772. tm.assert_frame_equal(result, expected)
  773. def test_result_type_broadcast(int_frame_const_col):
  774. # result_type should be consistent no matter which
  775. # path we take in the code
  776. df = int_frame_const_col
  777. # broadcast result
  778. result = df.apply(lambda x: [1, 2, 3], axis=1, result_type="broadcast")
  779. expected = df.copy()
  780. tm.assert_frame_equal(result, expected)
  781. def test_result_type_broadcast_series_func(int_frame_const_col):
  782. # result_type should be consistent no matter which
  783. # path we take in the code
  784. df = int_frame_const_col
  785. columns = ["other", "col", "names"]
  786. result = df.apply(
  787. lambda x: Series([1, 2, 3], index=columns), axis=1, result_type="broadcast"
  788. )
  789. expected = df.copy()
  790. tm.assert_frame_equal(result, expected)
  791. def test_result_type_series_result(int_frame_const_col):
  792. # result_type should be consistent no matter which
  793. # path we take in the code
  794. df = int_frame_const_col
  795. # series result
  796. result = df.apply(lambda x: Series([1, 2, 3], index=x.index), axis=1)
  797. expected = df.copy()
  798. tm.assert_frame_equal(result, expected)
  799. def test_result_type_series_result_other_index(int_frame_const_col):
  800. # result_type should be consistent no matter which
  801. # path we take in the code
  802. df = int_frame_const_col
  803. # series result with other index
  804. columns = ["other", "col", "names"]
  805. result = df.apply(lambda x: Series([1, 2, 3], index=columns), axis=1)
  806. expected = df.copy()
  807. expected.columns = columns
  808. tm.assert_frame_equal(result, expected)
  809. @pytest.mark.parametrize(
  810. "box",
  811. [lambda x: list(x), lambda x: tuple(x), lambda x: np.array(x, dtype="int64")],
  812. ids=["list", "tuple", "array"],
  813. )
  814. def test_consistency_for_boxed(box, int_frame_const_col):
  815. # passing an array or list should not affect the output shape
  816. df = int_frame_const_col
  817. result = df.apply(lambda x: box([1, 2]), axis=1)
  818. expected = Series([box([1, 2]) for t in df.itertuples()])
  819. tm.assert_series_equal(result, expected)
  820. result = df.apply(lambda x: box([1, 2]), axis=1, result_type="expand")
  821. expected = int_frame_const_col[["A", "B"]].rename(columns={"A": 0, "B": 1})
  822. tm.assert_frame_equal(result, expected)
  823. def test_agg_transform(axis, float_frame):
  824. other_axis = 1 if axis in {0, "index"} else 0
  825. with np.errstate(all="ignore"):
  826. f_abs = np.abs(float_frame)
  827. f_sqrt = np.sqrt(float_frame)
  828. # ufunc
  829. expected = f_sqrt.copy()
  830. result = float_frame.apply(np.sqrt, axis=axis)
  831. tm.assert_frame_equal(result, expected)
  832. # list-like
  833. result = float_frame.apply([np.sqrt], axis=axis)
  834. expected = f_sqrt.copy()
  835. if axis in {0, "index"}:
  836. expected.columns = MultiIndex.from_product([float_frame.columns, ["sqrt"]])
  837. else:
  838. expected.index = MultiIndex.from_product([float_frame.index, ["sqrt"]])
  839. tm.assert_frame_equal(result, expected)
  840. # multiple items in list
  841. # these are in the order as if we are applying both
  842. # functions per series and then concatting
  843. result = float_frame.apply([np.abs, np.sqrt], axis=axis)
  844. expected = zip_frames([f_abs, f_sqrt], axis=other_axis)
  845. if axis in {0, "index"}:
  846. expected.columns = MultiIndex.from_product(
  847. [float_frame.columns, ["absolute", "sqrt"]]
  848. )
  849. else:
  850. expected.index = MultiIndex.from_product(
  851. [float_frame.index, ["absolute", "sqrt"]]
  852. )
  853. tm.assert_frame_equal(result, expected)
  854. def test_demo():
  855. # demonstration tests
  856. df = DataFrame({"A": range(5), "B": 5})
  857. result = df.agg(["min", "max"])
  858. expected = DataFrame(
  859. {"A": [0, 4], "B": [5, 5]}, columns=["A", "B"], index=["min", "max"]
  860. )
  861. tm.assert_frame_equal(result, expected)
  862. def test_demo_dict_agg():
  863. # demonstration tests
  864. df = DataFrame({"A": range(5), "B": 5})
  865. result = df.agg({"A": ["min", "max"], "B": ["sum", "max"]})
  866. expected = DataFrame(
  867. {"A": [4.0, 0.0, np.nan], "B": [5.0, np.nan, 25.0]},
  868. columns=["A", "B"],
  869. index=["max", "min", "sum"],
  870. )
  871. tm.assert_frame_equal(result.reindex_like(expected), expected)
  872. def test_agg_with_name_as_column_name():
  873. # GH 36212 - Column name is "name"
  874. data = {"name": ["foo", "bar"]}
  875. df = DataFrame(data)
  876. # result's name should be None
  877. result = df.agg({"name": "count"})
  878. expected = Series({"name": 2})
  879. tm.assert_series_equal(result, expected)
  880. # Check if name is still preserved when aggregating series instead
  881. result = df["name"].agg({"name": "count"})
  882. expected = Series({"name": 2}, name="name")
  883. tm.assert_series_equal(result, expected)
  884. def test_agg_multiple_mixed():
  885. # GH 20909
  886. mdf = DataFrame(
  887. {
  888. "A": [1, 2, 3],
  889. "B": [1.0, 2.0, 3.0],
  890. "C": ["foo", "bar", "baz"],
  891. }
  892. )
  893. expected = DataFrame(
  894. {
  895. "A": [1, 6],
  896. "B": [1.0, 6.0],
  897. "C": ["bar", "foobarbaz"],
  898. },
  899. index=["min", "sum"],
  900. )
  901. # sorted index
  902. result = mdf.agg(["min", "sum"])
  903. tm.assert_frame_equal(result, expected)
  904. result = mdf[["C", "B", "A"]].agg(["sum", "min"])
  905. # GH40420: the result of .agg should have an index that is sorted
  906. # according to the arguments provided to agg.
  907. expected = expected[["C", "B", "A"]].reindex(["sum", "min"])
  908. tm.assert_frame_equal(result, expected)
  909. def test_agg_multiple_mixed_raises():
  910. # GH 20909
  911. mdf = DataFrame(
  912. {
  913. "A": [1, 2, 3],
  914. "B": [1.0, 2.0, 3.0],
  915. "C": ["foo", "bar", "baz"],
  916. "D": date_range("20130101", periods=3),
  917. }
  918. )
  919. # sorted index
  920. msg = "does not support reduction"
  921. with pytest.raises(TypeError, match=msg):
  922. mdf.agg(["min", "sum"])
  923. with pytest.raises(TypeError, match=msg):
  924. mdf[["D", "C", "B", "A"]].agg(["sum", "min"])
  925. def test_agg_reduce(axis, float_frame):
  926. other_axis = 1 if axis in {0, "index"} else 0
  927. name1, name2 = float_frame.axes[other_axis].unique()[:2].sort_values()
  928. # all reducers
  929. expected = pd.concat(
  930. [
  931. float_frame.mean(axis=axis),
  932. float_frame.max(axis=axis),
  933. float_frame.sum(axis=axis),
  934. ],
  935. axis=1,
  936. )
  937. expected.columns = ["mean", "max", "sum"]
  938. expected = expected.T if axis in {0, "index"} else expected
  939. result = float_frame.agg(["mean", "max", "sum"], axis=axis)
  940. tm.assert_frame_equal(result, expected)
  941. # dict input with scalars
  942. func = {name1: "mean", name2: "sum"}
  943. result = float_frame.agg(func, axis=axis)
  944. expected = Series(
  945. [
  946. float_frame.loc(other_axis)[name1].mean(),
  947. float_frame.loc(other_axis)[name2].sum(),
  948. ],
  949. index=[name1, name2],
  950. )
  951. tm.assert_series_equal(result, expected)
  952. # dict input with lists
  953. func = {name1: ["mean"], name2: ["sum"]}
  954. result = float_frame.agg(func, axis=axis)
  955. expected = DataFrame(
  956. {
  957. name1: Series([float_frame.loc(other_axis)[name1].mean()], index=["mean"]),
  958. name2: Series([float_frame.loc(other_axis)[name2].sum()], index=["sum"]),
  959. }
  960. )
  961. expected = expected.T if axis in {1, "columns"} else expected
  962. tm.assert_frame_equal(result, expected)
  963. # dict input with lists with multiple
  964. func = {name1: ["mean", "sum"], name2: ["sum", "max"]}
  965. result = float_frame.agg(func, axis=axis)
  966. expected = pd.concat(
  967. {
  968. name1: Series(
  969. [
  970. float_frame.loc(other_axis)[name1].mean(),
  971. float_frame.loc(other_axis)[name1].sum(),
  972. ],
  973. index=["mean", "sum"],
  974. ),
  975. name2: Series(
  976. [
  977. float_frame.loc(other_axis)[name2].sum(),
  978. float_frame.loc(other_axis)[name2].max(),
  979. ],
  980. index=["sum", "max"],
  981. ),
  982. },
  983. axis=1,
  984. )
  985. expected = expected.T if axis in {1, "columns"} else expected
  986. tm.assert_frame_equal(result, expected)
  987. def test_nuiscance_columns():
  988. # GH 15015
  989. df = DataFrame(
  990. {
  991. "A": [1, 2, 3],
  992. "B": [1.0, 2.0, 3.0],
  993. "C": ["foo", "bar", "baz"],
  994. "D": date_range("20130101", periods=3),
  995. }
  996. )
  997. result = df.agg("min")
  998. expected = Series([1, 1.0, "bar", Timestamp("20130101")], index=df.columns)
  999. tm.assert_series_equal(result, expected)
  1000. result = df.agg(["min"])
  1001. expected = DataFrame(
  1002. [[1, 1.0, "bar", Timestamp("20130101")]],
  1003. index=["min"],
  1004. columns=df.columns,
  1005. )
  1006. tm.assert_frame_equal(result, expected)
  1007. msg = "does not support reduction"
  1008. with pytest.raises(TypeError, match=msg):
  1009. df.agg("sum")
  1010. result = df[["A", "B", "C"]].agg("sum")
  1011. expected = Series([6, 6.0, "foobarbaz"], index=["A", "B", "C"])
  1012. tm.assert_series_equal(result, expected)
  1013. msg = "does not support reduction"
  1014. with pytest.raises(TypeError, match=msg):
  1015. df.agg(["sum"])
  1016. @pytest.mark.parametrize("how", ["agg", "apply"])
  1017. def test_non_callable_aggregates(how):
  1018. # GH 16405
  1019. # 'size' is a property of frame/series
  1020. # validate that this is working
  1021. # GH 39116 - expand to apply
  1022. df = DataFrame(
  1023. {"A": [None, 2, 3], "B": [1.0, np.nan, 3.0], "C": ["foo", None, "bar"]}
  1024. )
  1025. # Function aggregate
  1026. result = getattr(df, how)({"A": "count"})
  1027. expected = Series({"A": 2})
  1028. tm.assert_series_equal(result, expected)
  1029. # Non-function aggregate
  1030. result = getattr(df, how)({"A": "size"})
  1031. expected = Series({"A": 3})
  1032. tm.assert_series_equal(result, expected)
  1033. # Mix function and non-function aggs
  1034. result1 = getattr(df, how)(["count", "size"])
  1035. result2 = getattr(df, how)(
  1036. {"A": ["count", "size"], "B": ["count", "size"], "C": ["count", "size"]}
  1037. )
  1038. expected = DataFrame(
  1039. {
  1040. "A": {"count": 2, "size": 3},
  1041. "B": {"count": 2, "size": 3},
  1042. "C": {"count": 2, "size": 3},
  1043. }
  1044. )
  1045. tm.assert_frame_equal(result1, result2, check_like=True)
  1046. tm.assert_frame_equal(result2, expected, check_like=True)
  1047. # Just functional string arg is same as calling df.arg()
  1048. result = getattr(df, how)("count")
  1049. expected = df.count()
  1050. tm.assert_series_equal(result, expected)
  1051. @pytest.mark.parametrize("how", ["agg", "apply"])
  1052. def test_size_as_str(how, axis):
  1053. # GH 39934
  1054. df = DataFrame(
  1055. {"A": [None, 2, 3], "B": [1.0, np.nan, 3.0], "C": ["foo", None, "bar"]}
  1056. )
  1057. # Just a string attribute arg same as calling df.arg
  1058. # on the columns
  1059. result = getattr(df, how)("size", axis=axis)
  1060. if axis in (0, "index"):
  1061. expected = Series(df.shape[0], index=df.columns)
  1062. else:
  1063. expected = Series(df.shape[1], index=df.index)
  1064. tm.assert_series_equal(result, expected)
  1065. def test_agg_listlike_result():
  1066. # GH-29587 user defined function returning list-likes
  1067. df = DataFrame({"A": [2, 2, 3], "B": [1.5, np.nan, 1.5], "C": ["foo", None, "bar"]})
  1068. def func(group_col):
  1069. return list(group_col.dropna().unique())
  1070. result = df.agg(func)
  1071. expected = Series([[2, 3], [1.5], ["foo", "bar"]], index=["A", "B", "C"])
  1072. tm.assert_series_equal(result, expected)
  1073. result = df.agg([func])
  1074. expected = expected.to_frame("func").T
  1075. tm.assert_frame_equal(result, expected)
  1076. @pytest.mark.parametrize("axis", [0, 1])
  1077. @pytest.mark.parametrize(
  1078. "args, kwargs",
  1079. [
  1080. ((1, 2, 3), {}),
  1081. ((8, 7, 15), {}),
  1082. ((1, 2), {}),
  1083. ((1,), {"b": 2}),
  1084. ((), {"a": 1, "b": 2}),
  1085. ((), {"a": 2, "b": 1}),
  1086. ((), {"a": 1, "b": 2, "c": 3}),
  1087. ],
  1088. )
  1089. def test_agg_args_kwargs(axis, args, kwargs):
  1090. def f(x, a, b, c=3):
  1091. return x.sum() + (a + b) / c
  1092. df = DataFrame([[1, 2], [3, 4]])
  1093. if axis == 0:
  1094. expected = Series([5.0, 7.0])
  1095. else:
  1096. expected = Series([4.0, 8.0])
  1097. result = df.agg(f, axis, *args, **kwargs)
  1098. tm.assert_series_equal(result, expected)
  1099. @pytest.mark.parametrize("num_cols", [2, 3, 5])
  1100. def test_frequency_is_original(num_cols):
  1101. # GH 22150
  1102. index = pd.DatetimeIndex(["1950-06-30", "1952-10-24", "1953-05-29"])
  1103. original = index.copy()
  1104. df = DataFrame(1, index=index, columns=range(num_cols))
  1105. df.apply(lambda x: x)
  1106. assert index.freq == original.freq
  1107. def test_apply_datetime_tz_issue():
  1108. # GH 29052
  1109. timestamps = [
  1110. Timestamp("2019-03-15 12:34:31.909000+0000", tz="UTC"),
  1111. Timestamp("2019-03-15 12:34:34.359000+0000", tz="UTC"),
  1112. Timestamp("2019-03-15 12:34:34.660000+0000", tz="UTC"),
  1113. ]
  1114. df = DataFrame(data=[0, 1, 2], index=timestamps)
  1115. result = df.apply(lambda x: x.name, axis=1)
  1116. expected = Series(index=timestamps, data=timestamps)
  1117. tm.assert_series_equal(result, expected)
  1118. @pytest.mark.parametrize("df", [DataFrame({"A": ["a", None], "B": ["c", "d"]})])
  1119. @pytest.mark.parametrize("method", ["min", "max", "sum"])
  1120. def test_mixed_column_raises(df, method):
  1121. # GH 16832
  1122. if method == "sum":
  1123. msg = r'can only concatenate str \(not "int"\) to str'
  1124. else:
  1125. msg = "not supported between instances of 'str' and 'float'"
  1126. with pytest.raises(TypeError, match=msg):
  1127. getattr(df, method)()
  1128. @pytest.mark.parametrize("col", [1, 1.0, True, "a", np.nan])
  1129. def test_apply_dtype(col):
  1130. # GH 31466
  1131. df = DataFrame([[1.0, col]], columns=["a", "b"])
  1132. result = df.apply(lambda x: x.dtype)
  1133. expected = df.dtypes
  1134. tm.assert_series_equal(result, expected)
  1135. def test_apply_mutating(using_array_manager, using_copy_on_write):
  1136. # GH#35462 case where applied func pins a new BlockManager to a row
  1137. df = DataFrame({"a": range(100), "b": range(100, 200)})
  1138. df_orig = df.copy()
  1139. def func(row):
  1140. mgr = row._mgr
  1141. row.loc["a"] += 1
  1142. assert row._mgr is not mgr
  1143. return row
  1144. expected = df.copy()
  1145. expected["a"] += 1
  1146. result = df.apply(func, axis=1)
  1147. tm.assert_frame_equal(result, expected)
  1148. if using_copy_on_write or using_array_manager:
  1149. # INFO(CoW) With copy on write, mutating a viewing row doesn't mutate the parent
  1150. # INFO(ArrayManager) With BlockManager, the row is a view and mutated in place,
  1151. # with ArrayManager the row is not a view, and thus not mutated in place
  1152. tm.assert_frame_equal(df, df_orig)
  1153. else:
  1154. tm.assert_frame_equal(df, result)
  1155. def test_apply_empty_list_reduce():
  1156. # GH#35683 get columns correct
  1157. df = DataFrame([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]], columns=["a", "b"])
  1158. result = df.apply(lambda x: [], result_type="reduce")
  1159. expected = Series({"a": [], "b": []}, dtype=object)
  1160. tm.assert_series_equal(result, expected)
  1161. def test_apply_no_suffix_index():
  1162. # GH36189
  1163. pdf = DataFrame([[4, 9]] * 3, columns=["A", "B"])
  1164. result = pdf.apply(["sum", lambda x: x.sum(), lambda x: x.sum()])
  1165. expected = DataFrame(
  1166. {"A": [12, 12, 12], "B": [27, 27, 27]}, index=["sum", "<lambda>", "<lambda>"]
  1167. )
  1168. tm.assert_frame_equal(result, expected)
  1169. def test_apply_raw_returns_string():
  1170. # https://github.com/pandas-dev/pandas/issues/35940
  1171. df = DataFrame({"A": ["aa", "bbb"]})
  1172. result = df.apply(lambda x: x[0], axis=1, raw=True)
  1173. expected = Series(["aa", "bbb"])
  1174. tm.assert_series_equal(result, expected)
  1175. def test_aggregation_func_column_order():
  1176. # GH40420: the result of .agg should have an index that is sorted
  1177. # according to the arguments provided to agg.
  1178. df = DataFrame(
  1179. [
  1180. (1, 0, 0),
  1181. (2, 0, 0),
  1182. (3, 0, 0),
  1183. (4, 5, 4),
  1184. (5, 6, 6),
  1185. (6, 7, 7),
  1186. ],
  1187. columns=("att1", "att2", "att3"),
  1188. )
  1189. def sum_div2(s):
  1190. return s.sum() / 2
  1191. aggs = ["sum", sum_div2, "count", "min"]
  1192. result = df.agg(aggs)
  1193. expected = DataFrame(
  1194. {
  1195. "att1": [21.0, 10.5, 6.0, 1.0],
  1196. "att2": [18.0, 9.0, 6.0, 0.0],
  1197. "att3": [17.0, 8.5, 6.0, 0.0],
  1198. },
  1199. index=["sum", "sum_div2", "count", "min"],
  1200. )
  1201. tm.assert_frame_equal(result, expected)
  1202. def test_apply_getitem_axis_1():
  1203. # GH 13427
  1204. df = DataFrame({"a": [0, 1, 2], "b": [1, 2, 3]})
  1205. result = df[["a", "a"]].apply(lambda x: x[0] + x[1], axis=1)
  1206. expected = Series([0, 2, 4])
  1207. tm.assert_series_equal(result, expected)
  1208. def test_nuisance_depr_passes_through_warnings():
  1209. # GH 43740
  1210. # DataFrame.agg with list-likes may emit warnings for both individual
  1211. # args and for entire columns, but we only want to emit once. We
  1212. # catch and suppress the warnings for individual args, but need to make
  1213. # sure if some other warnings were raised, they get passed through to
  1214. # the user.
  1215. def expected_warning(x):
  1216. warnings.warn("Hello, World!")
  1217. return x.sum()
  1218. df = DataFrame({"a": [1, 2, 3]})
  1219. with tm.assert_produces_warning(UserWarning, match="Hello, World!"):
  1220. df.agg([expected_warning])
  1221. def test_apply_type():
  1222. # GH 46719
  1223. df = DataFrame(
  1224. {"col1": [3, "string", float], "col2": [0.25, datetime(2020, 1, 1), np.nan]},
  1225. index=["a", "b", "c"],
  1226. )
  1227. # applymap
  1228. result = df.applymap(type)
  1229. expected = DataFrame(
  1230. {"col1": [int, str, type], "col2": [float, datetime, float]},
  1231. index=["a", "b", "c"],
  1232. )
  1233. tm.assert_frame_equal(result, expected)
  1234. # axis=0
  1235. result = df.apply(type, axis=0)
  1236. expected = Series({"col1": Series, "col2": Series})
  1237. tm.assert_series_equal(result, expected)
  1238. # axis=1
  1239. result = df.apply(type, axis=1)
  1240. expected = Series({"a": Series, "b": Series, "c": Series})
  1241. tm.assert_series_equal(result, expected)
  1242. def test_apply_on_empty_dataframe():
  1243. # GH 39111
  1244. df = DataFrame({"a": [1, 2], "b": [3, 0]})
  1245. result = df.head(0).apply(lambda x: max(x["a"], x["b"]), axis=1)
  1246. expected = Series([], dtype=np.float64)
  1247. tm.assert_series_equal(result, expected)
  1248. @pytest.mark.parametrize(
  1249. "test, constant",
  1250. [
  1251. ({"a": [1, 2, 3], "b": [1, 1, 1]}, {"a": [1, 2, 3], "b": [1]}),
  1252. ({"a": [2, 2, 2], "b": [1, 1, 1]}, {"a": [2], "b": [1]}),
  1253. ],
  1254. )
  1255. def test_unique_agg_type_is_series(test, constant):
  1256. # GH#22558
  1257. df1 = DataFrame(test)
  1258. expected = Series(data=constant, index=["a", "b"], dtype="object")
  1259. aggregation = {"a": "unique", "b": "unique"}
  1260. result = df1.agg(aggregation)
  1261. tm.assert_series_equal(result, expected)
  1262. def test_any_apply_keyword_non_zero_axis_regression():
  1263. # https://github.com/pandas-dev/pandas/issues/48656
  1264. df = DataFrame({"A": [1, 2, 0], "B": [0, 2, 0], "C": [0, 0, 0]})
  1265. expected = Series([True, True, False])
  1266. tm.assert_series_equal(df.any(axis=1), expected)
  1267. result = df.apply("any", axis=1)
  1268. tm.assert_series_equal(result, expected)
  1269. result = df.apply("any", 1)
  1270. tm.assert_series_equal(result, expected)
  1271. def test_agg_list_like_func_with_args():
  1272. # GH 50624
  1273. df = DataFrame({"x": [1, 2, 3]})
  1274. def foo1(x, a=1, c=0):
  1275. return x + a + c
  1276. def foo2(x, b=2, c=0):
  1277. return x + b + c
  1278. msg = r"foo1\(\) got an unexpected keyword argument 'b'"
  1279. with pytest.raises(TypeError, match=msg):
  1280. df.agg([foo1, foo2], 0, 3, b=3, c=4)
  1281. result = df.agg([foo1, foo2], 0, 3, c=4)
  1282. expected = DataFrame(
  1283. [[8, 8], [9, 9], [10, 10]],
  1284. columns=MultiIndex.from_tuples([("x", "foo1"), ("x", "foo2")]),
  1285. )
  1286. tm.assert_frame_equal(result, expected)