test_series_apply.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956
  1. from collections import (
  2. Counter,
  3. defaultdict,
  4. )
  5. from decimal import Decimal
  6. import math
  7. import numpy as np
  8. import pytest
  9. import pandas as pd
  10. from pandas import (
  11. DataFrame,
  12. Index,
  13. MultiIndex,
  14. Series,
  15. concat,
  16. isna,
  17. timedelta_range,
  18. )
  19. import pandas._testing as tm
  20. from pandas.tests.apply.common import series_transform_kernels
  21. def test_series_map_box_timedelta():
  22. # GH#11349
  23. ser = Series(timedelta_range("1 day 1 s", periods=5, freq="h"))
  24. def f(x):
  25. return x.total_seconds()
  26. ser.map(f)
  27. ser.apply(f)
  28. DataFrame(ser).applymap(f)
  29. def test_apply(datetime_series):
  30. with np.errstate(all="ignore"):
  31. tm.assert_series_equal(datetime_series.apply(np.sqrt), np.sqrt(datetime_series))
  32. # element-wise apply
  33. tm.assert_series_equal(datetime_series.apply(math.exp), np.exp(datetime_series))
  34. # empty series
  35. s = Series(dtype=object, name="foo", index=Index([], name="bar"))
  36. rs = s.apply(lambda x: x)
  37. tm.assert_series_equal(s, rs)
  38. # check all metadata (GH 9322)
  39. assert s is not rs
  40. assert s.index is rs.index
  41. assert s.dtype == rs.dtype
  42. assert s.name == rs.name
  43. # index but no data
  44. s = Series(index=[1, 2, 3], dtype=np.float64)
  45. rs = s.apply(lambda x: x)
  46. tm.assert_series_equal(s, rs)
  47. def test_apply_same_length_inference_bug():
  48. s = Series([1, 2])
  49. def f(x):
  50. return (x, x + 1)
  51. result = s.apply(f)
  52. expected = s.map(f)
  53. tm.assert_series_equal(result, expected)
  54. s = Series([1, 2, 3])
  55. result = s.apply(f)
  56. expected = s.map(f)
  57. tm.assert_series_equal(result, expected)
  58. def test_apply_dont_convert_dtype():
  59. s = Series(np.random.randn(10))
  60. def f(x):
  61. return x if x > 0 else np.nan
  62. result = s.apply(f, convert_dtype=False)
  63. assert result.dtype == object
  64. def test_apply_args():
  65. s = Series(["foo,bar"])
  66. result = s.apply(str.split, args=(",",))
  67. assert result[0] == ["foo", "bar"]
  68. assert isinstance(result[0], list)
  69. @pytest.mark.parametrize(
  70. "args, kwargs, increment",
  71. [((), {}, 0), ((), {"a": 1}, 1), ((2, 3), {}, 32), ((1,), {"c": 2}, 201)],
  72. )
  73. def test_agg_args(args, kwargs, increment):
  74. # GH 43357
  75. def f(x, a=0, b=0, c=0):
  76. return x + a + 10 * b + 100 * c
  77. s = Series([1, 2])
  78. result = s.agg(f, 0, *args, **kwargs)
  79. expected = s + increment
  80. tm.assert_series_equal(result, expected)
  81. def test_agg_list_like_func_with_args():
  82. # GH 50624
  83. s = Series([1, 2, 3])
  84. def foo1(x, a=1, c=0):
  85. return x + a + c
  86. def foo2(x, b=2, c=0):
  87. return x + b + c
  88. msg = r"foo1\(\) got an unexpected keyword argument 'b'"
  89. with pytest.raises(TypeError, match=msg):
  90. s.agg([foo1, foo2], 0, 3, b=3, c=4)
  91. result = s.agg([foo1, foo2], 0, 3, c=4)
  92. expected = DataFrame({"foo1": [8, 9, 10], "foo2": [8, 9, 10]})
  93. tm.assert_frame_equal(result, expected)
  94. def test_series_map_box_timestamps():
  95. # GH#2689, GH#2627
  96. ser = Series(pd.date_range("1/1/2000", periods=10))
  97. def func(x):
  98. return (x.hour, x.day, x.month)
  99. # it works!
  100. ser.map(func)
  101. ser.apply(func)
  102. def test_series_map_stringdtype(any_string_dtype):
  103. # map test on StringDType, GH#40823
  104. ser1 = Series(
  105. data=["cat", "dog", "rabbit"],
  106. index=["id1", "id2", "id3"],
  107. dtype=any_string_dtype,
  108. )
  109. ser2 = Series(data=["id3", "id2", "id1", "id7000"], dtype=any_string_dtype)
  110. result = ser2.map(ser1)
  111. expected = Series(data=["rabbit", "dog", "cat", pd.NA], dtype=any_string_dtype)
  112. tm.assert_series_equal(result, expected)
  113. def test_apply_box():
  114. # ufunc will not be boxed. Same test cases as the test_map_box
  115. vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
  116. s = Series(vals)
  117. assert s.dtype == "datetime64[ns]"
  118. # boxed value must be Timestamp instance
  119. res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
  120. exp = Series(["Timestamp_1_None", "Timestamp_2_None"])
  121. tm.assert_series_equal(res, exp)
  122. vals = [
  123. pd.Timestamp("2011-01-01", tz="US/Eastern"),
  124. pd.Timestamp("2011-01-02", tz="US/Eastern"),
  125. ]
  126. s = Series(vals)
  127. assert s.dtype == "datetime64[ns, US/Eastern]"
  128. res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
  129. exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
  130. tm.assert_series_equal(res, exp)
  131. # timedelta
  132. vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
  133. s = Series(vals)
  134. assert s.dtype == "timedelta64[ns]"
  135. res = s.apply(lambda x: f"{type(x).__name__}_{x.days}")
  136. exp = Series(["Timedelta_1", "Timedelta_2"])
  137. tm.assert_series_equal(res, exp)
  138. # period
  139. vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
  140. s = Series(vals)
  141. assert s.dtype == "Period[M]"
  142. res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}")
  143. exp = Series(["Period_M", "Period_M"])
  144. tm.assert_series_equal(res, exp)
  145. def test_apply_datetimetz():
  146. values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize(
  147. "Asia/Tokyo"
  148. )
  149. s = Series(values, name="XX")
  150. result = s.apply(lambda x: x + pd.offsets.Day())
  151. exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize(
  152. "Asia/Tokyo"
  153. )
  154. exp = Series(exp_values, name="XX")
  155. tm.assert_series_equal(result, exp)
  156. result = s.apply(lambda x: x.hour)
  157. exp = Series(list(range(24)) + [0], name="XX", dtype=np.int32)
  158. tm.assert_series_equal(result, exp)
  159. # not vectorized
  160. def f(x):
  161. if not isinstance(x, pd.Timestamp):
  162. raise ValueError
  163. return str(x.tz)
  164. result = s.map(f)
  165. exp = Series(["Asia/Tokyo"] * 25, name="XX")
  166. tm.assert_series_equal(result, exp)
  167. def test_apply_categorical():
  168. values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
  169. ser = Series(values, name="XX", index=list("abcdefg"))
  170. result = ser.apply(lambda x: x.lower())
  171. # should be categorical dtype when the number of categories are
  172. # the same
  173. values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True)
  174. exp = Series(values, name="XX", index=list("abcdefg"))
  175. tm.assert_series_equal(result, exp)
  176. tm.assert_categorical_equal(result.values, exp.values)
  177. result = ser.apply(lambda x: "A")
  178. exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
  179. tm.assert_series_equal(result, exp)
  180. assert result.dtype == object
  181. @pytest.mark.parametrize("series", [["1-1", "1-1", np.NaN], ["1-1", "1-2", np.NaN]])
  182. def test_apply_categorical_with_nan_values(series):
  183. # GH 20714 bug fixed in: GH 24275
  184. s = Series(series, dtype="category")
  185. result = s.apply(lambda x: x.split("-")[0])
  186. result = result.astype(object)
  187. expected = Series(["1", "1", np.NaN], dtype="category")
  188. expected = expected.astype(object)
  189. tm.assert_series_equal(result, expected)
  190. def test_apply_empty_integer_series_with_datetime_index():
  191. # GH 21245
  192. s = Series([], index=pd.date_range(start="2018-01-01", periods=0), dtype=int)
  193. result = s.apply(lambda x: x)
  194. tm.assert_series_equal(result, s)
  195. def test_transform(string_series):
  196. # transforming functions
  197. with np.errstate(all="ignore"):
  198. f_sqrt = np.sqrt(string_series)
  199. f_abs = np.abs(string_series)
  200. # ufunc
  201. result = string_series.apply(np.sqrt)
  202. expected = f_sqrt.copy()
  203. tm.assert_series_equal(result, expected)
  204. # list-like
  205. result = string_series.apply([np.sqrt])
  206. expected = f_sqrt.to_frame().copy()
  207. expected.columns = ["sqrt"]
  208. tm.assert_frame_equal(result, expected)
  209. result = string_series.apply(["sqrt"])
  210. tm.assert_frame_equal(result, expected)
  211. # multiple items in list
  212. # these are in the order as if we are applying both functions per
  213. # series and then concatting
  214. expected = concat([f_sqrt, f_abs], axis=1)
  215. expected.columns = ["sqrt", "absolute"]
  216. result = string_series.apply([np.sqrt, np.abs])
  217. tm.assert_frame_equal(result, expected)
  218. # dict, provide renaming
  219. expected = concat([f_sqrt, f_abs], axis=1)
  220. expected.columns = ["foo", "bar"]
  221. expected = expected.unstack().rename("series")
  222. result = string_series.apply({"foo": np.sqrt, "bar": np.abs})
  223. tm.assert_series_equal(result.reindex_like(expected), expected)
  224. @pytest.mark.parametrize("op", series_transform_kernels)
  225. def test_transform_partial_failure(op, request):
  226. # GH 35964
  227. if op in ("ffill", "bfill", "pad", "backfill", "shift"):
  228. request.node.add_marker(
  229. pytest.mark.xfail(reason=f"{op} is successful on any dtype")
  230. )
  231. # Using object makes most transform kernels fail
  232. ser = Series(3 * [object])
  233. if op in ("fillna", "ngroup"):
  234. error = ValueError
  235. msg = "Transform function failed"
  236. else:
  237. error = TypeError
  238. msg = "|".join(
  239. [
  240. "not supported between instances of 'type' and 'type'",
  241. "unsupported operand type",
  242. ]
  243. )
  244. with pytest.raises(error, match=msg):
  245. ser.transform([op, "shift"])
  246. with pytest.raises(error, match=msg):
  247. ser.transform({"A": op, "B": "shift"})
  248. with pytest.raises(error, match=msg):
  249. ser.transform({"A": [op], "B": ["shift"]})
  250. with pytest.raises(error, match=msg):
  251. ser.transform({"A": [op, "shift"], "B": [op]})
  252. def test_transform_partial_failure_valueerror():
  253. # GH 40211
  254. def noop(x):
  255. return x
  256. def raising_op(_):
  257. raise ValueError
  258. ser = Series(3 * [object])
  259. msg = "Transform function failed"
  260. with pytest.raises(ValueError, match=msg):
  261. ser.transform([noop, raising_op])
  262. with pytest.raises(ValueError, match=msg):
  263. ser.transform({"A": raising_op, "B": noop})
  264. with pytest.raises(ValueError, match=msg):
  265. ser.transform({"A": [raising_op], "B": [noop]})
  266. with pytest.raises(ValueError, match=msg):
  267. ser.transform({"A": [noop, raising_op], "B": [noop]})
  268. def test_demo():
  269. # demonstration tests
  270. s = Series(range(6), dtype="int64", name="series")
  271. result = s.agg(["min", "max"])
  272. expected = Series([0, 5], index=["min", "max"], name="series")
  273. tm.assert_series_equal(result, expected)
  274. result = s.agg({"foo": "min"})
  275. expected = Series([0], index=["foo"], name="series")
  276. tm.assert_series_equal(result, expected)
  277. def test_agg_apply_evaluate_lambdas_the_same(string_series):
  278. # test that we are evaluating row-by-row first
  279. # before vectorized evaluation
  280. result = string_series.apply(lambda x: str(x))
  281. expected = string_series.agg(lambda x: str(x))
  282. tm.assert_series_equal(result, expected)
  283. result = string_series.apply(str)
  284. expected = string_series.agg(str)
  285. tm.assert_series_equal(result, expected)
  286. def test_with_nested_series(datetime_series):
  287. # GH 2316
  288. # .agg with a reducer and a transform, what to do
  289. result = datetime_series.apply(lambda x: Series([x, x**2], index=["x", "x^2"]))
  290. expected = DataFrame({"x": datetime_series, "x^2": datetime_series**2})
  291. tm.assert_frame_equal(result, expected)
  292. result = datetime_series.agg(lambda x: Series([x, x**2], index=["x", "x^2"]))
  293. tm.assert_frame_equal(result, expected)
  294. def test_replicate_describe(string_series):
  295. # this also tests a result set that is all scalars
  296. expected = string_series.describe()
  297. result = string_series.apply(
  298. {
  299. "count": "count",
  300. "mean": "mean",
  301. "std": "std",
  302. "min": "min",
  303. "25%": lambda x: x.quantile(0.25),
  304. "50%": "median",
  305. "75%": lambda x: x.quantile(0.75),
  306. "max": "max",
  307. }
  308. )
  309. tm.assert_series_equal(result, expected)
  310. def test_reduce(string_series):
  311. # reductions with named functions
  312. result = string_series.agg(["sum", "mean"])
  313. expected = Series(
  314. [string_series.sum(), string_series.mean()],
  315. ["sum", "mean"],
  316. name=string_series.name,
  317. )
  318. tm.assert_series_equal(result, expected)
  319. @pytest.mark.parametrize("how", ["agg", "apply"])
  320. def test_non_callable_aggregates(how):
  321. # test agg using non-callable series attributes
  322. # GH 39116 - expand to apply
  323. s = Series([1, 2, None])
  324. # Calling agg w/ just a string arg same as calling s.arg
  325. result = getattr(s, how)("size")
  326. expected = s.size
  327. assert result == expected
  328. # test when mixed w/ callable reducers
  329. result = getattr(s, how)(["size", "count", "mean"])
  330. expected = Series({"size": 3.0, "count": 2.0, "mean": 1.5})
  331. tm.assert_series_equal(result, expected)
  332. def test_series_apply_no_suffix_index():
  333. # GH36189
  334. s = Series([4] * 3)
  335. result = s.apply(["sum", lambda x: x.sum(), lambda x: x.sum()])
  336. expected = Series([12, 12, 12], index=["sum", "<lambda>", "<lambda>"])
  337. tm.assert_series_equal(result, expected)
  338. def test_map(datetime_series):
  339. index, data = tm.getMixedTypeDict()
  340. source = Series(data["B"], index=data["C"])
  341. target = Series(data["C"][:4], index=data["D"][:4])
  342. merged = target.map(source)
  343. for k, v in merged.items():
  344. assert v == source[target[k]]
  345. # input could be a dict
  346. merged = target.map(source.to_dict())
  347. for k, v in merged.items():
  348. assert v == source[target[k]]
  349. # function
  350. result = datetime_series.map(lambda x: x * 2)
  351. tm.assert_series_equal(result, datetime_series * 2)
  352. # GH 10324
  353. a = Series([1, 2, 3, 4])
  354. b = Series(["even", "odd", "even", "odd"], dtype="category")
  355. c = Series(["even", "odd", "even", "odd"])
  356. exp = Series(["odd", "even", "odd", np.nan], dtype="category")
  357. tm.assert_series_equal(a.map(b), exp)
  358. exp = Series(["odd", "even", "odd", np.nan])
  359. tm.assert_series_equal(a.map(c), exp)
  360. a = Series(["a", "b", "c", "d"])
  361. b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(["b", "c", "d", "e"]))
  362. c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"]))
  363. exp = Series([np.nan, 1, 2, 3])
  364. tm.assert_series_equal(a.map(b), exp)
  365. exp = Series([np.nan, 1, 2, 3])
  366. tm.assert_series_equal(a.map(c), exp)
  367. a = Series(["a", "b", "c", "d"])
  368. b = Series(
  369. ["B", "C", "D", "E"],
  370. dtype="category",
  371. index=pd.CategoricalIndex(["b", "c", "d", "e"]),
  372. )
  373. c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"]))
  374. exp = Series(
  375. pd.Categorical([np.nan, "B", "C", "D"], categories=["B", "C", "D", "E"])
  376. )
  377. tm.assert_series_equal(a.map(b), exp)
  378. exp = Series([np.nan, "B", "C", "D"])
  379. tm.assert_series_equal(a.map(c), exp)
  380. def test_map_empty(request, index):
  381. if isinstance(index, MultiIndex):
  382. request.node.add_marker(
  383. pytest.mark.xfail(
  384. reason="Initializing a Series from a MultiIndex is not supported"
  385. )
  386. )
  387. s = Series(index)
  388. result = s.map({})
  389. expected = Series(np.nan, index=s.index)
  390. tm.assert_series_equal(result, expected)
  391. def test_map_compat():
  392. # related GH 8024
  393. s = Series([True, True, False], index=[1, 2, 3])
  394. result = s.map({True: "foo", False: "bar"})
  395. expected = Series(["foo", "foo", "bar"], index=[1, 2, 3])
  396. tm.assert_series_equal(result, expected)
  397. def test_map_int():
  398. left = Series({"a": 1.0, "b": 2.0, "c": 3.0, "d": 4})
  399. right = Series({1: 11, 2: 22, 3: 33})
  400. assert left.dtype == np.float_
  401. assert issubclass(right.dtype.type, np.integer)
  402. merged = left.map(right)
  403. assert merged.dtype == np.float_
  404. assert isna(merged["d"])
  405. assert not isna(merged["c"])
  406. def test_map_type_inference():
  407. s = Series(range(3))
  408. s2 = s.map(lambda x: np.where(x == 0, 0, 1))
  409. assert issubclass(s2.dtype.type, np.integer)
  410. def test_map_decimal(string_series):
  411. result = string_series.map(lambda x: Decimal(str(x)))
  412. assert result.dtype == np.object_
  413. assert isinstance(result[0], Decimal)
  414. def test_map_na_exclusion():
  415. s = Series([1.5, np.nan, 3, np.nan, 5])
  416. result = s.map(lambda x: x * 2, na_action="ignore")
  417. exp = s * 2
  418. tm.assert_series_equal(result, exp)
  419. def test_map_dict_with_tuple_keys():
  420. """
  421. Due to new MultiIndex-ing behaviour in v0.14.0,
  422. dicts with tuple keys passed to map were being
  423. converted to a multi-index, preventing tuple values
  424. from being mapped properly.
  425. """
  426. # GH 18496
  427. df = DataFrame({"a": [(1,), (2,), (3, 4), (5, 6)]})
  428. label_mappings = {(1,): "A", (2,): "B", (3, 4): "A", (5, 6): "B"}
  429. df["labels"] = df["a"].map(label_mappings)
  430. df["expected_labels"] = Series(["A", "B", "A", "B"], index=df.index)
  431. # All labels should be filled now
  432. tm.assert_series_equal(df["labels"], df["expected_labels"], check_names=False)
  433. def test_map_counter():
  434. s = Series(["a", "b", "c"], index=[1, 2, 3])
  435. counter = Counter()
  436. counter["b"] = 5
  437. counter["c"] += 1
  438. result = s.map(counter)
  439. expected = Series([0, 5, 1], index=[1, 2, 3])
  440. tm.assert_series_equal(result, expected)
  441. def test_map_defaultdict():
  442. s = Series([1, 2, 3], index=["a", "b", "c"])
  443. default_dict = defaultdict(lambda: "blank")
  444. default_dict[1] = "stuff"
  445. result = s.map(default_dict)
  446. expected = Series(["stuff", "blank", "blank"], index=["a", "b", "c"])
  447. tm.assert_series_equal(result, expected)
  448. def test_map_dict_na_key():
  449. # https://github.com/pandas-dev/pandas/issues/17648
  450. # Checks that np.nan key is appropriately mapped
  451. s = Series([1, 2, np.nan])
  452. expected = Series(["a", "b", "c"])
  453. result = s.map({1: "a", 2: "b", np.nan: "c"})
  454. tm.assert_series_equal(result, expected)
  455. @pytest.mark.parametrize("na_action", [None, "ignore"])
  456. def test_map_defaultdict_na_key(na_action):
  457. # GH 48813
  458. s = Series([1, 2, np.nan])
  459. default_map = defaultdict(lambda: "missing", {1: "a", 2: "b", np.nan: "c"})
  460. result = s.map(default_map, na_action=na_action)
  461. expected = Series({0: "a", 1: "b", 2: "c" if na_action is None else np.nan})
  462. tm.assert_series_equal(result, expected)
  463. @pytest.mark.parametrize("na_action", [None, "ignore"])
  464. def test_map_defaultdict_missing_key(na_action):
  465. # GH 48813
  466. s = Series([1, 2, np.nan])
  467. default_map = defaultdict(lambda: "missing", {1: "a", 2: "b", 3: "c"})
  468. result = s.map(default_map, na_action=na_action)
  469. expected = Series({0: "a", 1: "b", 2: "missing" if na_action is None else np.nan})
  470. tm.assert_series_equal(result, expected)
  471. @pytest.mark.parametrize("na_action", [None, "ignore"])
  472. def test_map_defaultdict_unmutated(na_action):
  473. # GH 48813
  474. s = Series([1, 2, np.nan])
  475. default_map = defaultdict(lambda: "missing", {1: "a", 2: "b", np.nan: "c"})
  476. expected_default_map = default_map.copy()
  477. s.map(default_map, na_action=na_action)
  478. assert default_map == expected_default_map
  479. @pytest.mark.parametrize("arg_func", [dict, Series])
  480. def test_map_dict_ignore_na(arg_func):
  481. # GH#47527
  482. mapping = arg_func({1: 10, np.nan: 42})
  483. ser = Series([1, np.nan, 2])
  484. result = ser.map(mapping, na_action="ignore")
  485. expected = Series([10, np.nan, np.nan])
  486. tm.assert_series_equal(result, expected)
  487. def test_map_defaultdict_ignore_na():
  488. # GH#47527
  489. mapping = defaultdict(int, {1: 10, np.nan: 42})
  490. ser = Series([1, np.nan, 2])
  491. result = ser.map(mapping)
  492. expected = Series([10, 42, 0])
  493. tm.assert_series_equal(result, expected)
  494. def test_map_categorical_na_ignore():
  495. # GH#47527
  496. values = pd.Categorical([1, np.nan, 2], categories=[10, 1])
  497. ser = Series(values)
  498. result = ser.map({1: 10, np.nan: 42})
  499. expected = Series([10, np.nan, np.nan])
  500. tm.assert_series_equal(result, expected)
  501. def test_map_dict_subclass_with_missing():
  502. """
  503. Test Series.map with a dictionary subclass that defines __missing__,
  504. i.e. sets a default value (GH #15999).
  505. """
  506. class DictWithMissing(dict):
  507. def __missing__(self, key):
  508. return "missing"
  509. s = Series([1, 2, 3])
  510. dictionary = DictWithMissing({3: "three"})
  511. result = s.map(dictionary)
  512. expected = Series(["missing", "missing", "three"])
  513. tm.assert_series_equal(result, expected)
  514. def test_map_dict_subclass_without_missing():
  515. class DictWithoutMissing(dict):
  516. pass
  517. s = Series([1, 2, 3])
  518. dictionary = DictWithoutMissing({3: "three"})
  519. result = s.map(dictionary)
  520. expected = Series([np.nan, np.nan, "three"])
  521. tm.assert_series_equal(result, expected)
  522. def test_map_abc_mapping(non_dict_mapping_subclass):
  523. # https://github.com/pandas-dev/pandas/issues/29733
  524. # Check collections.abc.Mapping support as mapper for Series.map
  525. s = Series([1, 2, 3])
  526. not_a_dictionary = non_dict_mapping_subclass({3: "three"})
  527. result = s.map(not_a_dictionary)
  528. expected = Series([np.nan, np.nan, "three"])
  529. tm.assert_series_equal(result, expected)
  530. def test_map_abc_mapping_with_missing(non_dict_mapping_subclass):
  531. # https://github.com/pandas-dev/pandas/issues/29733
  532. # Check collections.abc.Mapping support as mapper for Series.map
  533. class NonDictMappingWithMissing(non_dict_mapping_subclass):
  534. def __missing__(self, key):
  535. return "missing"
  536. s = Series([1, 2, 3])
  537. not_a_dictionary = NonDictMappingWithMissing({3: "three"})
  538. result = s.map(not_a_dictionary)
  539. # __missing__ is a dict concept, not a Mapping concept,
  540. # so it should not change the result!
  541. expected = Series([np.nan, np.nan, "three"])
  542. tm.assert_series_equal(result, expected)
  543. def test_map_box():
  544. vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
  545. s = Series(vals)
  546. assert s.dtype == "datetime64[ns]"
  547. # boxed value must be Timestamp instance
  548. res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
  549. exp = Series(["Timestamp_1_None", "Timestamp_2_None"])
  550. tm.assert_series_equal(res, exp)
  551. vals = [
  552. pd.Timestamp("2011-01-01", tz="US/Eastern"),
  553. pd.Timestamp("2011-01-02", tz="US/Eastern"),
  554. ]
  555. s = Series(vals)
  556. assert s.dtype == "datetime64[ns, US/Eastern]"
  557. res = s.apply(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
  558. exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
  559. tm.assert_series_equal(res, exp)
  560. # timedelta
  561. vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
  562. s = Series(vals)
  563. assert s.dtype == "timedelta64[ns]"
  564. res = s.apply(lambda x: f"{type(x).__name__}_{x.days}")
  565. exp = Series(["Timedelta_1", "Timedelta_2"])
  566. tm.assert_series_equal(res, exp)
  567. # period
  568. vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
  569. s = Series(vals)
  570. assert s.dtype == "Period[M]"
  571. res = s.apply(lambda x: f"{type(x).__name__}_{x.freqstr}")
  572. exp = Series(["Period_M", "Period_M"])
  573. tm.assert_series_equal(res, exp)
  574. def test_map_categorical():
  575. values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
  576. s = Series(values, name="XX", index=list("abcdefg"))
  577. result = s.map(lambda x: x.lower())
  578. exp_values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True)
  579. exp = Series(exp_values, name="XX", index=list("abcdefg"))
  580. tm.assert_series_equal(result, exp)
  581. tm.assert_categorical_equal(result.values, exp_values)
  582. result = s.map(lambda x: "A")
  583. exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
  584. tm.assert_series_equal(result, exp)
  585. assert result.dtype == object
  586. def test_map_datetimetz():
  587. values = pd.date_range("2011-01-01", "2011-01-02", freq="H").tz_localize(
  588. "Asia/Tokyo"
  589. )
  590. s = Series(values, name="XX")
  591. # keep tz
  592. result = s.map(lambda x: x + pd.offsets.Day())
  593. exp_values = pd.date_range("2011-01-02", "2011-01-03", freq="H").tz_localize(
  594. "Asia/Tokyo"
  595. )
  596. exp = Series(exp_values, name="XX")
  597. tm.assert_series_equal(result, exp)
  598. result = s.map(lambda x: x.hour)
  599. exp = Series(list(range(24)) + [0], name="XX", dtype=np.int32)
  600. tm.assert_series_equal(result, exp)
  601. # not vectorized
  602. def f(x):
  603. if not isinstance(x, pd.Timestamp):
  604. raise ValueError
  605. return str(x.tz)
  606. result = s.map(f)
  607. exp = Series(["Asia/Tokyo"] * 25, name="XX")
  608. tm.assert_series_equal(result, exp)
  609. @pytest.mark.parametrize(
  610. "vals,mapping,exp",
  611. [
  612. (list("abc"), {np.nan: "not NaN"}, [np.nan] * 3 + ["not NaN"]),
  613. (list("abc"), {"a": "a letter"}, ["a letter"] + [np.nan] * 3),
  614. (list(range(3)), {0: 42}, [42] + [np.nan] * 3),
  615. ],
  616. )
  617. def test_map_missing_mixed(vals, mapping, exp):
  618. # GH20495
  619. s = Series(vals + [np.nan])
  620. result = s.map(mapping)
  621. tm.assert_series_equal(result, Series(exp))
  622. @pytest.mark.parametrize(
  623. "dti,exp",
  624. [
  625. (
  626. Series([1, 2], index=pd.DatetimeIndex([0, 31536000000])),
  627. DataFrame(np.repeat([[1, 2]], 2, axis=0), dtype="int64"),
  628. ),
  629. (
  630. tm.makeTimeSeries(nper=30),
  631. DataFrame(np.repeat([[1, 2]], 30, axis=0), dtype="int64"),
  632. ),
  633. ],
  634. )
  635. @pytest.mark.parametrize("aware", [True, False])
  636. def test_apply_series_on_date_time_index_aware_series(dti, exp, aware):
  637. # GH 25959
  638. # Calling apply on a localized time series should not cause an error
  639. if aware:
  640. index = dti.tz_localize("UTC").index
  641. else:
  642. index = dti.index
  643. result = Series(index).apply(lambda x: Series([1, 2]))
  644. tm.assert_frame_equal(result, exp)
  645. def test_apply_scalar_on_date_time_index_aware_series():
  646. # GH 25959
  647. # Calling apply on a localized time series should not cause an error
  648. series = tm.makeTimeSeries(nper=30).tz_localize("UTC")
  649. result = Series(series.index).apply(lambda x: 1)
  650. tm.assert_series_equal(result, Series(np.ones(30), dtype="int64"))
  651. def test_map_float_to_string_precision():
  652. # GH 13228
  653. ser = Series(1 / 3)
  654. result = ser.map(lambda val: str(val)).to_dict()
  655. expected = {0: "0.3333333333333333"}
  656. assert result == expected
  657. def test_apply_to_timedelta():
  658. list_of_valid_strings = ["00:00:01", "00:00:02"]
  659. a = pd.to_timedelta(list_of_valid_strings)
  660. b = Series(list_of_valid_strings).apply(pd.to_timedelta)
  661. tm.assert_series_equal(Series(a), b)
  662. list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT]
  663. a = pd.to_timedelta(list_of_strings)
  664. ser = Series(list_of_strings)
  665. b = ser.apply(pd.to_timedelta)
  666. tm.assert_series_equal(Series(a), b)
  667. @pytest.mark.parametrize(
  668. "ops, names",
  669. [
  670. ([np.sum], ["sum"]),
  671. ([np.sum, np.mean], ["sum", "mean"]),
  672. (np.array([np.sum]), ["sum"]),
  673. (np.array([np.sum, np.mean]), ["sum", "mean"]),
  674. ],
  675. )
  676. @pytest.mark.parametrize("how", ["agg", "apply"])
  677. def test_apply_listlike_reducer(string_series, ops, names, how):
  678. # GH 39140
  679. expected = Series({name: op(string_series) for name, op in zip(names, ops)})
  680. expected.name = "series"
  681. result = getattr(string_series, how)(ops)
  682. tm.assert_series_equal(result, expected)
  683. @pytest.mark.parametrize(
  684. "ops",
  685. [
  686. {"A": np.sum},
  687. {"A": np.sum, "B": np.mean},
  688. Series({"A": np.sum}),
  689. Series({"A": np.sum, "B": np.mean}),
  690. ],
  691. )
  692. @pytest.mark.parametrize("how", ["agg", "apply"])
  693. def test_apply_dictlike_reducer(string_series, ops, how):
  694. # GH 39140
  695. expected = Series({name: op(string_series) for name, op in ops.items()})
  696. expected.name = string_series.name
  697. result = getattr(string_series, how)(ops)
  698. tm.assert_series_equal(result, expected)
  699. @pytest.mark.parametrize(
  700. "ops, names",
  701. [
  702. ([np.sqrt], ["sqrt"]),
  703. ([np.abs, np.sqrt], ["absolute", "sqrt"]),
  704. (np.array([np.sqrt]), ["sqrt"]),
  705. (np.array([np.abs, np.sqrt]), ["absolute", "sqrt"]),
  706. ],
  707. )
  708. def test_apply_listlike_transformer(string_series, ops, names):
  709. # GH 39140
  710. with np.errstate(all="ignore"):
  711. expected = concat([op(string_series) for op in ops], axis=1)
  712. expected.columns = names
  713. result = string_series.apply(ops)
  714. tm.assert_frame_equal(result, expected)
  715. @pytest.mark.parametrize(
  716. "ops",
  717. [
  718. {"A": np.sqrt},
  719. {"A": np.sqrt, "B": np.exp},
  720. Series({"A": np.sqrt}),
  721. Series({"A": np.sqrt, "B": np.exp}),
  722. ],
  723. )
  724. def test_apply_dictlike_transformer(string_series, ops):
  725. # GH 39140
  726. with np.errstate(all="ignore"):
  727. expected = concat({name: op(string_series) for name, op in ops.items()})
  728. expected.name = string_series.name
  729. result = string_series.apply(ops)
  730. tm.assert_series_equal(result, expected)
  731. def test_apply_retains_column_name():
  732. # GH 16380
  733. df = DataFrame({"x": range(3)}, Index(range(3), name="x"))
  734. result = df.x.apply(lambda x: Series(range(x + 1), Index(range(x + 1), name="y")))
  735. expected = DataFrame(
  736. [[0.0, np.nan, np.nan], [0.0, 1.0, np.nan], [0.0, 1.0, 2.0]],
  737. columns=Index(range(3), name="y"),
  738. index=Index(range(3), name="x"),
  739. )
  740. tm.assert_frame_equal(result, expected)
  741. def test_apply_type():
  742. # GH 46719
  743. s = Series([3, "string", float], index=["a", "b", "c"])
  744. result = s.apply(type)
  745. expected = Series([int, str, type], index=["a", "b", "c"])
  746. tm.assert_series_equal(result, expected)