apply.py 46 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502
  1. from __future__ import annotations
  2. import abc
  3. from collections import defaultdict
  4. from contextlib import nullcontext
  5. from functools import partial
  6. import inspect
  7. from typing import (
  8. TYPE_CHECKING,
  9. Any,
  10. Callable,
  11. ContextManager,
  12. DefaultDict,
  13. Dict,
  14. Hashable,
  15. Iterable,
  16. Iterator,
  17. List,
  18. Sequence,
  19. cast,
  20. )
  21. import numpy as np
  22. from pandas._config import option_context
  23. from pandas._libs import lib
  24. from pandas._typing import (
  25. AggFuncType,
  26. AggFuncTypeBase,
  27. AggFuncTypeDict,
  28. AggObjType,
  29. Axis,
  30. AxisInt,
  31. NDFrameT,
  32. npt,
  33. )
  34. from pandas.errors import SpecificationError
  35. from pandas.util._decorators import cache_readonly
  36. from pandas.core.dtypes.cast import is_nested_object
  37. from pandas.core.dtypes.common import (
  38. is_dict_like,
  39. is_extension_array_dtype,
  40. is_list_like,
  41. is_sequence,
  42. )
  43. from pandas.core.dtypes.generic import (
  44. ABCDataFrame,
  45. ABCNDFrame,
  46. ABCSeries,
  47. )
  48. from pandas.core.algorithms import safe_sort
  49. from pandas.core.base import SelectionMixin
  50. import pandas.core.common as com
  51. from pandas.core.construction import ensure_wrapped_if_datetimelike
  52. if TYPE_CHECKING:
  53. from pandas import (
  54. DataFrame,
  55. Index,
  56. Series,
  57. )
  58. from pandas.core.groupby import GroupBy
  59. from pandas.core.resample import Resampler
  60. from pandas.core.window.rolling import BaseWindow
  61. ResType = Dict[int, Any]
  62. def frame_apply(
  63. obj: DataFrame,
  64. func: AggFuncType,
  65. axis: Axis = 0,
  66. raw: bool = False,
  67. result_type: str | None = None,
  68. args=None,
  69. kwargs=None,
  70. ) -> FrameApply:
  71. """construct and return a row or column based frame apply object"""
  72. axis = obj._get_axis_number(axis)
  73. klass: type[FrameApply]
  74. if axis == 0:
  75. klass = FrameRowApply
  76. elif axis == 1:
  77. klass = FrameColumnApply
  78. return klass(
  79. obj,
  80. func,
  81. raw=raw,
  82. result_type=result_type,
  83. args=args,
  84. kwargs=kwargs,
  85. )
  86. class Apply(metaclass=abc.ABCMeta):
  87. axis: AxisInt
  88. def __init__(
  89. self,
  90. obj: AggObjType,
  91. func,
  92. raw: bool,
  93. result_type: str | None,
  94. args,
  95. kwargs,
  96. ) -> None:
  97. self.obj = obj
  98. self.raw = raw
  99. self.args = args or ()
  100. self.kwargs = kwargs or {}
  101. if result_type not in [None, "reduce", "broadcast", "expand"]:
  102. raise ValueError(
  103. "invalid value for result_type, must be one "
  104. "of {None, 'reduce', 'broadcast', 'expand'}"
  105. )
  106. self.result_type = result_type
  107. # curry if needed
  108. if (
  109. (kwargs or args)
  110. and not isinstance(func, (np.ufunc, str))
  111. and not is_list_like(func)
  112. ):
  113. def f(x):
  114. return func(x, *args, **kwargs)
  115. else:
  116. f = func
  117. self.orig_f: AggFuncType = func
  118. self.f: AggFuncType = f
  119. @abc.abstractmethod
  120. def apply(self) -> DataFrame | Series:
  121. pass
  122. def agg(self) -> DataFrame | Series | None:
  123. """
  124. Provide an implementation for the aggregators.
  125. Returns
  126. -------
  127. Result of aggregation, or None if agg cannot be performed by
  128. this method.
  129. """
  130. obj = self.obj
  131. arg = self.f
  132. args = self.args
  133. kwargs = self.kwargs
  134. if isinstance(arg, str):
  135. return self.apply_str()
  136. if is_dict_like(arg):
  137. return self.agg_dict_like()
  138. elif is_list_like(arg):
  139. # we require a list, but not a 'str'
  140. return self.agg_list_like()
  141. if callable(arg):
  142. f = com.get_cython_func(arg)
  143. if f and not args and not kwargs:
  144. return getattr(obj, f)()
  145. # caller can react
  146. return None
  147. def transform(self) -> DataFrame | Series:
  148. """
  149. Transform a DataFrame or Series.
  150. Returns
  151. -------
  152. DataFrame or Series
  153. Result of applying ``func`` along the given axis of the
  154. Series or DataFrame.
  155. Raises
  156. ------
  157. ValueError
  158. If the transform function fails or does not transform.
  159. """
  160. obj = self.obj
  161. func = self.orig_f
  162. axis = self.axis
  163. args = self.args
  164. kwargs = self.kwargs
  165. is_series = obj.ndim == 1
  166. if obj._get_axis_number(axis) == 1:
  167. assert not is_series
  168. return obj.T.transform(func, 0, *args, **kwargs).T
  169. if is_list_like(func) and not is_dict_like(func):
  170. func = cast(List[AggFuncTypeBase], func)
  171. # Convert func equivalent dict
  172. if is_series:
  173. func = {com.get_callable_name(v) or v: v for v in func}
  174. else:
  175. func = {col: func for col in obj}
  176. if is_dict_like(func):
  177. func = cast(AggFuncTypeDict, func)
  178. return self.transform_dict_like(func)
  179. # func is either str or callable
  180. func = cast(AggFuncTypeBase, func)
  181. try:
  182. result = self.transform_str_or_callable(func)
  183. except TypeError:
  184. raise
  185. except Exception as err:
  186. raise ValueError("Transform function failed") from err
  187. # Functions that transform may return empty Series/DataFrame
  188. # when the dtype is not appropriate
  189. if (
  190. isinstance(result, (ABCSeries, ABCDataFrame))
  191. and result.empty
  192. and not obj.empty
  193. ):
  194. raise ValueError("Transform function failed")
  195. # error: Argument 1 to "__get__" of "AxisProperty" has incompatible type
  196. # "Union[Series, DataFrame, GroupBy[Any], SeriesGroupBy,
  197. # DataFrameGroupBy, BaseWindow, Resampler]"; expected "Union[DataFrame,
  198. # Series]"
  199. if not isinstance(result, (ABCSeries, ABCDataFrame)) or not result.index.equals(
  200. obj.index # type:ignore[arg-type]
  201. ):
  202. raise ValueError("Function did not transform")
  203. return result
  204. def transform_dict_like(self, func):
  205. """
  206. Compute transform in the case of a dict-like func
  207. """
  208. from pandas.core.reshape.concat import concat
  209. obj = self.obj
  210. args = self.args
  211. kwargs = self.kwargs
  212. # transform is currently only for Series/DataFrame
  213. assert isinstance(obj, ABCNDFrame)
  214. if len(func) == 0:
  215. raise ValueError("No transform functions were provided")
  216. func = self.normalize_dictlike_arg("transform", obj, func)
  217. results: dict[Hashable, DataFrame | Series] = {}
  218. for name, how in func.items():
  219. colg = obj._gotitem(name, ndim=1)
  220. results[name] = colg.transform(how, 0, *args, **kwargs)
  221. return concat(results, axis=1)
  222. def transform_str_or_callable(self, func) -> DataFrame | Series:
  223. """
  224. Compute transform in the case of a string or callable func
  225. """
  226. obj = self.obj
  227. args = self.args
  228. kwargs = self.kwargs
  229. if isinstance(func, str):
  230. return self._try_aggregate_string_function(obj, func, *args, **kwargs)
  231. if not args and not kwargs:
  232. f = com.get_cython_func(func)
  233. if f:
  234. return getattr(obj, f)()
  235. # Two possible ways to use a UDF - apply or call directly
  236. try:
  237. return obj.apply(func, args=args, **kwargs)
  238. except Exception:
  239. return func(obj, *args, **kwargs)
  240. def agg_list_like(self) -> DataFrame | Series:
  241. """
  242. Compute aggregation in the case of a list-like argument.
  243. Returns
  244. -------
  245. Result of aggregation.
  246. """
  247. from pandas.core.groupby.generic import (
  248. DataFrameGroupBy,
  249. SeriesGroupBy,
  250. )
  251. from pandas.core.reshape.concat import concat
  252. obj = self.obj
  253. arg = cast(List[AggFuncTypeBase], self.f)
  254. if getattr(obj, "axis", 0) == 1:
  255. raise NotImplementedError("axis other than 0 is not supported")
  256. if not isinstance(obj, SelectionMixin):
  257. # i.e. obj is Series or DataFrame
  258. selected_obj = obj
  259. elif obj._selected_obj.ndim == 1:
  260. # For SeriesGroupBy this matches _obj_with_exclusions
  261. selected_obj = obj._selected_obj
  262. else:
  263. selected_obj = obj._obj_with_exclusions
  264. results = []
  265. keys = []
  266. is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy))
  267. context_manager: ContextManager
  268. if is_groupby:
  269. # When as_index=False, we combine all results using indices
  270. # and adjust index after
  271. context_manager = com.temp_setattr(obj, "as_index", True)
  272. else:
  273. context_manager = nullcontext()
  274. with context_manager:
  275. # degenerate case
  276. if selected_obj.ndim == 1:
  277. for a in arg:
  278. colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj)
  279. if isinstance(colg, (ABCSeries, ABCDataFrame)):
  280. new_res = colg.aggregate(
  281. a, self.axis, *self.args, **self.kwargs
  282. )
  283. else:
  284. new_res = colg.aggregate(a, *self.args, **self.kwargs)
  285. results.append(new_res)
  286. # make sure we find a good name
  287. name = com.get_callable_name(a) or a
  288. keys.append(name)
  289. else:
  290. indices = []
  291. for index, col in enumerate(selected_obj):
  292. colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
  293. if isinstance(colg, (ABCSeries, ABCDataFrame)):
  294. new_res = colg.aggregate(
  295. arg, self.axis, *self.args, **self.kwargs
  296. )
  297. else:
  298. new_res = colg.aggregate(arg, *self.args, **self.kwargs)
  299. results.append(new_res)
  300. indices.append(index)
  301. keys = selected_obj.columns.take(indices)
  302. try:
  303. return concat(results, keys=keys, axis=1, sort=False)
  304. except TypeError as err:
  305. # we are concatting non-NDFrame objects,
  306. # e.g. a list of scalars
  307. from pandas import Series
  308. result = Series(results, index=keys, name=obj.name)
  309. if is_nested_object(result):
  310. raise ValueError(
  311. "cannot combine transform and aggregation operations"
  312. ) from err
  313. return result
  314. def agg_dict_like(self) -> DataFrame | Series:
  315. """
  316. Compute aggregation in the case of a dict-like argument.
  317. Returns
  318. -------
  319. Result of aggregation.
  320. """
  321. from pandas import Index
  322. from pandas.core.groupby.generic import (
  323. DataFrameGroupBy,
  324. SeriesGroupBy,
  325. )
  326. from pandas.core.reshape.concat import concat
  327. obj = self.obj
  328. arg = cast(AggFuncTypeDict, self.f)
  329. if getattr(obj, "axis", 0) == 1:
  330. raise NotImplementedError("axis other than 0 is not supported")
  331. if not isinstance(obj, SelectionMixin):
  332. # i.e. obj is Series or DataFrame
  333. selected_obj = obj
  334. selection = None
  335. else:
  336. selected_obj = obj._selected_obj
  337. selection = obj._selection
  338. arg = self.normalize_dictlike_arg("agg", selected_obj, arg)
  339. is_groupby = isinstance(obj, (DataFrameGroupBy, SeriesGroupBy))
  340. context_manager: ContextManager
  341. if is_groupby:
  342. # When as_index=False, we combine all results using indices
  343. # and adjust index after
  344. context_manager = com.temp_setattr(obj, "as_index", True)
  345. else:
  346. context_manager = nullcontext()
  347. with context_manager:
  348. if selected_obj.ndim == 1:
  349. # key only used for output
  350. colg = obj._gotitem(selection, ndim=1)
  351. results = {key: colg.agg(how) for key, how in arg.items()}
  352. else:
  353. # key used for column selection and output
  354. results = {
  355. key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()
  356. }
  357. # set the final keys
  358. keys = list(arg.keys())
  359. # Avoid making two isinstance calls in all and any below
  360. is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()]
  361. # combine results
  362. if all(is_ndframe):
  363. keys_to_use: Iterable[Hashable]
  364. keys_to_use = [k for k in keys if not results[k].empty]
  365. # Have to check, if at least one DataFrame is not empty.
  366. keys_to_use = keys_to_use if keys_to_use != [] else keys
  367. if selected_obj.ndim == 2:
  368. # keys are columns, so we can preserve names
  369. ktu = Index(keys_to_use)
  370. ktu._set_names(selected_obj.columns.names)
  371. keys_to_use = ktu
  372. axis: AxisInt = 0 if isinstance(obj, ABCSeries) else 1
  373. result = concat(
  374. {k: results[k] for k in keys_to_use},
  375. axis=axis,
  376. keys=keys_to_use,
  377. )
  378. elif any(is_ndframe):
  379. # There is a mix of NDFrames and scalars
  380. raise ValueError(
  381. "cannot perform both aggregation "
  382. "and transformation operations "
  383. "simultaneously"
  384. )
  385. else:
  386. from pandas import Series
  387. # we have a dict of scalars
  388. # GH 36212 use name only if obj is a series
  389. if obj.ndim == 1:
  390. obj = cast("Series", obj)
  391. name = obj.name
  392. else:
  393. name = None
  394. result = Series(results, name=name)
  395. return result
  396. def apply_str(self) -> DataFrame | Series:
  397. """
  398. Compute apply in case of a string.
  399. Returns
  400. -------
  401. result: Series or DataFrame
  402. """
  403. # Caller is responsible for checking isinstance(self.f, str)
  404. f = cast(str, self.f)
  405. obj = self.obj
  406. # Support for `frame.transform('method')`
  407. # Some methods (shift, etc.) require the axis argument, others
  408. # don't, so inspect and insert if necessary.
  409. func = getattr(obj, f, None)
  410. if callable(func):
  411. sig = inspect.getfullargspec(func)
  412. arg_names = (*sig.args, *sig.kwonlyargs)
  413. if self.axis != 0 and (
  414. "axis" not in arg_names or f in ("corrwith", "skew")
  415. ):
  416. raise ValueError(f"Operation {f} does not support axis=1")
  417. if "axis" in arg_names:
  418. self.kwargs["axis"] = self.axis
  419. return self._try_aggregate_string_function(obj, f, *self.args, **self.kwargs)
  420. def apply_multiple(self) -> DataFrame | Series:
  421. """
  422. Compute apply in case of a list-like or dict-like.
  423. Returns
  424. -------
  425. result: Series, DataFrame, or None
  426. Result when self.f is a list-like or dict-like, None otherwise.
  427. """
  428. return self.obj.aggregate(self.f, self.axis, *self.args, **self.kwargs)
  429. def normalize_dictlike_arg(
  430. self, how: str, obj: DataFrame | Series, func: AggFuncTypeDict
  431. ) -> AggFuncTypeDict:
  432. """
  433. Handler for dict-like argument.
  434. Ensures that necessary columns exist if obj is a DataFrame, and
  435. that a nested renamer is not passed. Also normalizes to all lists
  436. when values consists of a mix of list and non-lists.
  437. """
  438. assert how in ("apply", "agg", "transform")
  439. # Can't use func.values(); wouldn't work for a Series
  440. if (
  441. how == "agg"
  442. and isinstance(obj, ABCSeries)
  443. and any(is_list_like(v) for _, v in func.items())
  444. ) or (any(is_dict_like(v) for _, v in func.items())):
  445. # GH 15931 - deprecation of renaming keys
  446. raise SpecificationError("nested renamer is not supported")
  447. if obj.ndim != 1:
  448. # Check for missing columns on a frame
  449. cols = set(func.keys()) - set(obj.columns)
  450. if len(cols) > 0:
  451. cols_sorted = list(safe_sort(list(cols)))
  452. raise KeyError(f"Column(s) {cols_sorted} do not exist")
  453. aggregator_types = (list, tuple, dict)
  454. # if we have a dict of any non-scalars
  455. # eg. {'A' : ['mean']}, normalize all to
  456. # be list-likes
  457. # Cannot use func.values() because arg may be a Series
  458. if any(isinstance(x, aggregator_types) for _, x in func.items()):
  459. new_func: AggFuncTypeDict = {}
  460. for k, v in func.items():
  461. if not isinstance(v, aggregator_types):
  462. new_func[k] = [v]
  463. else:
  464. new_func[k] = v
  465. func = new_func
  466. return func
  467. def _try_aggregate_string_function(self, obj, arg: str, *args, **kwargs):
  468. """
  469. if arg is a string, then try to operate on it:
  470. - try to find a function (or attribute) on ourselves
  471. - try to find a numpy function
  472. - raise
  473. """
  474. assert isinstance(arg, str)
  475. f = getattr(obj, arg, None)
  476. if f is not None:
  477. if callable(f):
  478. return f(*args, **kwargs)
  479. # people may try to aggregate on a non-callable attribute
  480. # but don't let them think they can pass args to it
  481. assert len(args) == 0
  482. assert len([kwarg for kwarg in kwargs if kwarg not in ["axis"]]) == 0
  483. return f
  484. f = getattr(np, arg, None)
  485. if f is not None and hasattr(obj, "__array__"):
  486. # in particular exclude Window
  487. return f(obj, *args, **kwargs)
  488. raise AttributeError(
  489. f"'{arg}' is not a valid function for '{type(obj).__name__}' object"
  490. )
  491. class NDFrameApply(Apply):
  492. """
  493. Methods shared by FrameApply and SeriesApply but
  494. not GroupByApply or ResamplerWindowApply
  495. """
  496. obj: DataFrame | Series
  497. @property
  498. def index(self) -> Index:
  499. return self.obj.index
  500. @property
  501. def agg_axis(self) -> Index:
  502. return self.obj._get_agg_axis(self.axis)
  503. class FrameApply(NDFrameApply):
  504. obj: DataFrame
  505. # ---------------------------------------------------------------
  506. # Abstract Methods
  507. @property
  508. @abc.abstractmethod
  509. def result_index(self) -> Index:
  510. pass
  511. @property
  512. @abc.abstractmethod
  513. def result_columns(self) -> Index:
  514. pass
  515. @property
  516. @abc.abstractmethod
  517. def series_generator(self) -> Iterator[Series]:
  518. pass
  519. @abc.abstractmethod
  520. def wrap_results_for_axis(
  521. self, results: ResType, res_index: Index
  522. ) -> DataFrame | Series:
  523. pass
  524. # ---------------------------------------------------------------
  525. @property
  526. def res_columns(self) -> Index:
  527. return self.result_columns
  528. @property
  529. def columns(self) -> Index:
  530. return self.obj.columns
  531. @cache_readonly
  532. def values(self):
  533. return self.obj.values
  534. @cache_readonly
  535. def dtypes(self) -> Series:
  536. return self.obj.dtypes
  537. def apply(self) -> DataFrame | Series:
  538. """compute the results"""
  539. # dispatch to agg
  540. if is_list_like(self.f):
  541. return self.apply_multiple()
  542. # all empty
  543. if len(self.columns) == 0 and len(self.index) == 0:
  544. return self.apply_empty_result()
  545. # string dispatch
  546. if isinstance(self.f, str):
  547. return self.apply_str()
  548. # ufunc
  549. elif isinstance(self.f, np.ufunc):
  550. with np.errstate(all="ignore"):
  551. results = self.obj._mgr.apply("apply", func=self.f)
  552. # _constructor will retain self.index and self.columns
  553. return self.obj._constructor(data=results)
  554. # broadcasting
  555. if self.result_type == "broadcast":
  556. return self.apply_broadcast(self.obj)
  557. # one axis empty
  558. elif not all(self.obj.shape):
  559. return self.apply_empty_result()
  560. # raw
  561. elif self.raw:
  562. return self.apply_raw()
  563. return self.apply_standard()
  564. def agg(self):
  565. obj = self.obj
  566. axis = self.axis
  567. # TODO: Avoid having to change state
  568. self.obj = self.obj if self.axis == 0 else self.obj.T
  569. self.axis = 0
  570. result = None
  571. try:
  572. result = super().agg()
  573. finally:
  574. self.obj = obj
  575. self.axis = axis
  576. if axis == 1:
  577. result = result.T if result is not None else result
  578. if result is None:
  579. result = self.obj.apply(self.orig_f, axis, args=self.args, **self.kwargs)
  580. return result
  581. def apply_empty_result(self):
  582. """
  583. we have an empty result; at least 1 axis is 0
  584. we will try to apply the function to an empty
  585. series in order to see if this is a reduction function
  586. """
  587. assert callable(self.f)
  588. # we are not asked to reduce or infer reduction
  589. # so just return a copy of the existing object
  590. if self.result_type not in ["reduce", None]:
  591. return self.obj.copy()
  592. # we may need to infer
  593. should_reduce = self.result_type == "reduce"
  594. from pandas import Series
  595. if not should_reduce:
  596. try:
  597. if self.axis == 0:
  598. r = self.f(Series([], dtype=np.float64))
  599. else:
  600. r = self.f(Series(index=self.columns, dtype=np.float64))
  601. except Exception:
  602. pass
  603. else:
  604. should_reduce = not isinstance(r, Series)
  605. if should_reduce:
  606. if len(self.agg_axis):
  607. r = self.f(Series([], dtype=np.float64))
  608. else:
  609. r = np.nan
  610. return self.obj._constructor_sliced(r, index=self.agg_axis)
  611. else:
  612. return self.obj.copy()
  613. def apply_raw(self):
  614. """apply to the values as a numpy array"""
  615. def wrap_function(func):
  616. """
  617. Wrap user supplied function to work around numpy issue.
  618. see https://github.com/numpy/numpy/issues/8352
  619. """
  620. def wrapper(*args, **kwargs):
  621. result = func(*args, **kwargs)
  622. if isinstance(result, str):
  623. result = np.array(result, dtype=object)
  624. return result
  625. return wrapper
  626. result = np.apply_along_axis(wrap_function(self.f), self.axis, self.values)
  627. # TODO: mixed type case
  628. if result.ndim == 2:
  629. return self.obj._constructor(result, index=self.index, columns=self.columns)
  630. else:
  631. return self.obj._constructor_sliced(result, index=self.agg_axis)
  632. def apply_broadcast(self, target: DataFrame) -> DataFrame:
  633. assert callable(self.f)
  634. result_values = np.empty_like(target.values)
  635. # axis which we want to compare compliance
  636. result_compare = target.shape[0]
  637. for i, col in enumerate(target.columns):
  638. res = self.f(target[col])
  639. ares = np.asarray(res).ndim
  640. # must be a scalar or 1d
  641. if ares > 1:
  642. raise ValueError("too many dims to broadcast")
  643. if ares == 1:
  644. # must match return dim
  645. if result_compare != len(res):
  646. raise ValueError("cannot broadcast result")
  647. result_values[:, i] = res
  648. # we *always* preserve the original index / columns
  649. result = self.obj._constructor(
  650. result_values, index=target.index, columns=target.columns
  651. )
  652. return result
  653. def apply_standard(self):
  654. results, res_index = self.apply_series_generator()
  655. # wrap results
  656. return self.wrap_results(results, res_index)
  657. def apply_series_generator(self) -> tuple[ResType, Index]:
  658. assert callable(self.f)
  659. series_gen = self.series_generator
  660. res_index = self.result_index
  661. results = {}
  662. with option_context("mode.chained_assignment", None):
  663. for i, v in enumerate(series_gen):
  664. # ignore SettingWithCopy here in case the user mutates
  665. results[i] = self.f(v)
  666. if isinstance(results[i], ABCSeries):
  667. # If we have a view on v, we need to make a copy because
  668. # series_generator will swap out the underlying data
  669. results[i] = results[i].copy(deep=False)
  670. return results, res_index
  671. def wrap_results(self, results: ResType, res_index: Index) -> DataFrame | Series:
  672. from pandas import Series
  673. # see if we can infer the results
  674. if len(results) > 0 and 0 in results and is_sequence(results[0]):
  675. return self.wrap_results_for_axis(results, res_index)
  676. # dict of scalars
  677. # the default dtype of an empty Series is `object`, but this
  678. # code can be hit by df.mean() where the result should have dtype
  679. # float64 even if it's an empty Series.
  680. constructor_sliced = self.obj._constructor_sliced
  681. if len(results) == 0 and constructor_sliced is Series:
  682. result = constructor_sliced(results, dtype=np.float64)
  683. else:
  684. result = constructor_sliced(results)
  685. result.index = res_index
  686. return result
  687. def apply_str(self) -> DataFrame | Series:
  688. # Caller is responsible for checking isinstance(self.f, str)
  689. # TODO: GH#39993 - Avoid special-casing by replacing with lambda
  690. if self.f == "size":
  691. # Special-cased because DataFrame.size returns a single scalar
  692. obj = self.obj
  693. value = obj.shape[self.axis]
  694. return obj._constructor_sliced(value, index=self.agg_axis)
  695. return super().apply_str()
  696. class FrameRowApply(FrameApply):
  697. axis: AxisInt = 0
  698. @property
  699. def series_generator(self):
  700. return (self.obj._ixs(i, axis=1) for i in range(len(self.columns)))
  701. @property
  702. def result_index(self) -> Index:
  703. return self.columns
  704. @property
  705. def result_columns(self) -> Index:
  706. return self.index
  707. def wrap_results_for_axis(
  708. self, results: ResType, res_index: Index
  709. ) -> DataFrame | Series:
  710. """return the results for the rows"""
  711. if self.result_type == "reduce":
  712. # e.g. test_apply_dict GH#8735
  713. res = self.obj._constructor_sliced(results)
  714. res.index = res_index
  715. return res
  716. elif self.result_type is None and all(
  717. isinstance(x, dict) for x in results.values()
  718. ):
  719. # Our operation was a to_dict op e.g.
  720. # test_apply_dict GH#8735, test_apply_reduce_to_dict GH#25196 #37544
  721. res = self.obj._constructor_sliced(results)
  722. res.index = res_index
  723. return res
  724. try:
  725. result = self.obj._constructor(data=results)
  726. except ValueError as err:
  727. if "All arrays must be of the same length" in str(err):
  728. # e.g. result = [[2, 3], [1.5], ['foo', 'bar']]
  729. # see test_agg_listlike_result GH#29587
  730. res = self.obj._constructor_sliced(results)
  731. res.index = res_index
  732. return res
  733. else:
  734. raise
  735. if not isinstance(results[0], ABCSeries):
  736. if len(result.index) == len(self.res_columns):
  737. result.index = self.res_columns
  738. if len(result.columns) == len(res_index):
  739. result.columns = res_index
  740. return result
  741. class FrameColumnApply(FrameApply):
  742. axis: AxisInt = 1
  743. def apply_broadcast(self, target: DataFrame) -> DataFrame:
  744. result = super().apply_broadcast(target.T)
  745. return result.T
  746. @property
  747. def series_generator(self):
  748. values = self.values
  749. values = ensure_wrapped_if_datetimelike(values)
  750. assert len(values) > 0
  751. # We create one Series object, and will swap out the data inside
  752. # of it. Kids: don't do this at home.
  753. ser = self.obj._ixs(0, axis=0)
  754. mgr = ser._mgr
  755. if is_extension_array_dtype(ser.dtype):
  756. # values will be incorrect for this block
  757. # TODO(EA2D): special case would be unnecessary with 2D EAs
  758. obj = self.obj
  759. for i in range(len(obj)):
  760. yield obj._ixs(i, axis=0)
  761. else:
  762. for arr, name in zip(values, self.index):
  763. # GH#35462 re-pin mgr in case setitem changed it
  764. ser._mgr = mgr
  765. mgr.set_values(arr)
  766. object.__setattr__(ser, "_name", name)
  767. yield ser
  768. @property
  769. def result_index(self) -> Index:
  770. return self.index
  771. @property
  772. def result_columns(self) -> Index:
  773. return self.columns
  774. def wrap_results_for_axis(
  775. self, results: ResType, res_index: Index
  776. ) -> DataFrame | Series:
  777. """return the results for the columns"""
  778. result: DataFrame | Series
  779. # we have requested to expand
  780. if self.result_type == "expand":
  781. result = self.infer_to_same_shape(results, res_index)
  782. # we have a non-series and don't want inference
  783. elif not isinstance(results[0], ABCSeries):
  784. result = self.obj._constructor_sliced(results)
  785. result.index = res_index
  786. # we may want to infer results
  787. else:
  788. result = self.infer_to_same_shape(results, res_index)
  789. return result
  790. def infer_to_same_shape(self, results: ResType, res_index: Index) -> DataFrame:
  791. """infer the results to the same shape as the input object"""
  792. result = self.obj._constructor(data=results)
  793. result = result.T
  794. # set the index
  795. result.index = res_index
  796. # infer dtypes
  797. result = result.infer_objects(copy=False)
  798. return result
  799. class SeriesApply(NDFrameApply):
  800. obj: Series
  801. axis: AxisInt = 0
  802. def __init__(
  803. self,
  804. obj: Series,
  805. func: AggFuncType,
  806. convert_dtype: bool,
  807. args,
  808. kwargs,
  809. ) -> None:
  810. self.convert_dtype = convert_dtype
  811. super().__init__(
  812. obj,
  813. func,
  814. raw=False,
  815. result_type=None,
  816. args=args,
  817. kwargs=kwargs,
  818. )
  819. def apply(self) -> DataFrame | Series:
  820. obj = self.obj
  821. if len(obj) == 0:
  822. return self.apply_empty_result()
  823. # dispatch to agg
  824. if is_list_like(self.f):
  825. return self.apply_multiple()
  826. if isinstance(self.f, str):
  827. # if we are a string, try to dispatch
  828. return self.apply_str()
  829. # self.f is Callable
  830. return self.apply_standard()
  831. def agg(self):
  832. result = super().agg()
  833. if result is None:
  834. f = self.f
  835. kwargs = self.kwargs
  836. # string, list-like, and dict-like are entirely handled in super
  837. assert callable(f)
  838. # we can be called from an inner function which
  839. # passes this meta-data
  840. kwargs.pop("_level", None)
  841. # try a regular apply, this evaluates lambdas
  842. # row-by-row; however if the lambda is expected a Series
  843. # expression, e.g.: lambda x: x-x.quantile(0.25)
  844. # this will fail, so we can try a vectorized evaluation
  845. # we cannot FIRST try the vectorized evaluation, because
  846. # then .agg and .apply would have different semantics if the
  847. # operation is actually defined on the Series, e.g. str
  848. try:
  849. result = self.obj.apply(f)
  850. except (ValueError, AttributeError, TypeError):
  851. result = f(self.obj)
  852. return result
  853. def apply_empty_result(self) -> Series:
  854. obj = self.obj
  855. return obj._constructor(dtype=obj.dtype, index=obj.index).__finalize__(
  856. obj, method="apply"
  857. )
  858. def apply_standard(self) -> DataFrame | Series:
  859. # caller is responsible for ensuring that f is Callable
  860. f = cast(Callable, self.f)
  861. obj = self.obj
  862. with np.errstate(all="ignore"):
  863. if isinstance(f, np.ufunc):
  864. return f(obj)
  865. # row-wise access
  866. if is_extension_array_dtype(obj.dtype) and hasattr(obj._values, "map"):
  867. # GH#23179 some EAs do not have `map`
  868. mapped = obj._values.map(f)
  869. else:
  870. values = obj.astype(object)._values
  871. mapped = lib.map_infer(
  872. values,
  873. f,
  874. convert=self.convert_dtype,
  875. )
  876. if len(mapped) and isinstance(mapped[0], ABCSeries):
  877. # GH#43986 Need to do list(mapped) in order to get treated as nested
  878. # See also GH#25959 regarding EA support
  879. return obj._constructor_expanddim(list(mapped), index=obj.index)
  880. else:
  881. return obj._constructor(mapped, index=obj.index).__finalize__(
  882. obj, method="apply"
  883. )
  884. class GroupByApply(Apply):
  885. def __init__(
  886. self,
  887. obj: GroupBy[NDFrameT],
  888. func: AggFuncType,
  889. args,
  890. kwargs,
  891. ) -> None:
  892. kwargs = kwargs.copy()
  893. self.axis = obj.obj._get_axis_number(kwargs.get("axis", 0))
  894. super().__init__(
  895. obj,
  896. func,
  897. raw=False,
  898. result_type=None,
  899. args=args,
  900. kwargs=kwargs,
  901. )
  902. def apply(self):
  903. raise NotImplementedError
  904. def transform(self):
  905. raise NotImplementedError
  906. class ResamplerWindowApply(Apply):
  907. axis: AxisInt = 0
  908. obj: Resampler | BaseWindow
  909. def __init__(
  910. self,
  911. obj: Resampler | BaseWindow,
  912. func: AggFuncType,
  913. args,
  914. kwargs,
  915. ) -> None:
  916. super().__init__(
  917. obj,
  918. func,
  919. raw=False,
  920. result_type=None,
  921. args=args,
  922. kwargs=kwargs,
  923. )
  924. def apply(self):
  925. raise NotImplementedError
  926. def transform(self):
  927. raise NotImplementedError
  928. def reconstruct_func(
  929. func: AggFuncType | None, **kwargs
  930. ) -> tuple[bool, AggFuncType | None, list[str] | None, npt.NDArray[np.intp] | None]:
  931. """
  932. This is the internal function to reconstruct func given if there is relabeling
  933. or not and also normalize the keyword to get new order of columns.
  934. If named aggregation is applied, `func` will be None, and kwargs contains the
  935. column and aggregation function information to be parsed;
  936. If named aggregation is not applied, `func` is either string (e.g. 'min') or
  937. Callable, or list of them (e.g. ['min', np.max]), or the dictionary of column name
  938. and str/Callable/list of them (e.g. {'A': 'min'}, or {'A': [np.min, lambda x: x]})
  939. If relabeling is True, will return relabeling, reconstructed func, column
  940. names, and the reconstructed order of columns.
  941. If relabeling is False, the columns and order will be None.
  942. Parameters
  943. ----------
  944. func: agg function (e.g. 'min' or Callable) or list of agg functions
  945. (e.g. ['min', np.max]) or dictionary (e.g. {'A': ['min', np.max]}).
  946. **kwargs: dict, kwargs used in is_multi_agg_with_relabel and
  947. normalize_keyword_aggregation function for relabelling
  948. Returns
  949. -------
  950. relabelling: bool, if there is relabelling or not
  951. func: normalized and mangled func
  952. columns: list of column names
  953. order: array of columns indices
  954. Examples
  955. --------
  956. >>> reconstruct_func(None, **{"foo": ("col", "min")})
  957. (True, defaultdict(<class 'list'>, {'col': ['min']}), ('foo',), array([0]))
  958. >>> reconstruct_func("min")
  959. (False, 'min', None, None)
  960. """
  961. relabeling = func is None and is_multi_agg_with_relabel(**kwargs)
  962. columns: list[str] | None = None
  963. order: npt.NDArray[np.intp] | None = None
  964. if not relabeling:
  965. if isinstance(func, list) and len(func) > len(set(func)):
  966. # GH 28426 will raise error if duplicated function names are used and
  967. # there is no reassigned name
  968. raise SpecificationError(
  969. "Function names must be unique if there is no new column names "
  970. "assigned"
  971. )
  972. if func is None:
  973. # nicer error message
  974. raise TypeError("Must provide 'func' or tuples of '(column, aggfunc).")
  975. if relabeling:
  976. func, columns, order = normalize_keyword_aggregation(kwargs)
  977. return relabeling, func, columns, order
  978. def is_multi_agg_with_relabel(**kwargs) -> bool:
  979. """
  980. Check whether kwargs passed to .agg look like multi-agg with relabeling.
  981. Parameters
  982. ----------
  983. **kwargs : dict
  984. Returns
  985. -------
  986. bool
  987. Examples
  988. --------
  989. >>> is_multi_agg_with_relabel(a="max")
  990. False
  991. >>> is_multi_agg_with_relabel(a_max=("a", "max"), a_min=("a", "min"))
  992. True
  993. >>> is_multi_agg_with_relabel()
  994. False
  995. """
  996. return all(isinstance(v, tuple) and len(v) == 2 for v in kwargs.values()) and (
  997. len(kwargs) > 0
  998. )
  999. def normalize_keyword_aggregation(
  1000. kwargs: dict,
  1001. ) -> tuple[dict, list[str], npt.NDArray[np.intp]]:
  1002. """
  1003. Normalize user-provided "named aggregation" kwargs.
  1004. Transforms from the new ``Mapping[str, NamedAgg]`` style kwargs
  1005. to the old Dict[str, List[scalar]]].
  1006. Parameters
  1007. ----------
  1008. kwargs : dict
  1009. Returns
  1010. -------
  1011. aggspec : dict
  1012. The transformed kwargs.
  1013. columns : List[str]
  1014. The user-provided keys.
  1015. col_idx_order : List[int]
  1016. List of columns indices.
  1017. Examples
  1018. --------
  1019. >>> normalize_keyword_aggregation({"output": ("input", "sum")})
  1020. (defaultdict(<class 'list'>, {'input': ['sum']}), ('output',), array([0]))
  1021. """
  1022. from pandas.core.indexes.base import Index
  1023. # Normalize the aggregation functions as Mapping[column, List[func]],
  1024. # process normally, then fixup the names.
  1025. # TODO: aggspec type: typing.Dict[str, List[AggScalar]]
  1026. # May be hitting https://github.com/python/mypy/issues/5958
  1027. # saying it doesn't have an attribute __name__
  1028. aggspec: DefaultDict = defaultdict(list)
  1029. order = []
  1030. columns, pairs = list(zip(*kwargs.items()))
  1031. for column, aggfunc in pairs:
  1032. aggspec[column].append(aggfunc)
  1033. order.append((column, com.get_callable_name(aggfunc) or aggfunc))
  1034. # uniquify aggfunc name if duplicated in order list
  1035. uniquified_order = _make_unique_kwarg_list(order)
  1036. # GH 25719, due to aggspec will change the order of assigned columns in aggregation
  1037. # uniquified_aggspec will store uniquified order list and will compare it with order
  1038. # based on index
  1039. aggspec_order = [
  1040. (column, com.get_callable_name(aggfunc) or aggfunc)
  1041. for column, aggfuncs in aggspec.items()
  1042. for aggfunc in aggfuncs
  1043. ]
  1044. uniquified_aggspec = _make_unique_kwarg_list(aggspec_order)
  1045. # get the new index of columns by comparison
  1046. col_idx_order = Index(uniquified_aggspec).get_indexer(uniquified_order)
  1047. return aggspec, columns, col_idx_order
  1048. def _make_unique_kwarg_list(
  1049. seq: Sequence[tuple[Any, Any]]
  1050. ) -> Sequence[tuple[Any, Any]]:
  1051. """
  1052. Uniquify aggfunc name of the pairs in the order list
  1053. Examples:
  1054. --------
  1055. >>> kwarg_list = [('a', '<lambda>'), ('a', '<lambda>'), ('b', '<lambda>')]
  1056. >>> _make_unique_kwarg_list(kwarg_list)
  1057. [('a', '<lambda>_0'), ('a', '<lambda>_1'), ('b', '<lambda>')]
  1058. """
  1059. return [
  1060. (pair[0], f"{pair[1]}_{seq[:i].count(pair)}") if seq.count(pair) > 1 else pair
  1061. for i, pair in enumerate(seq)
  1062. ]
  1063. def relabel_result(
  1064. result: DataFrame | Series,
  1065. func: dict[str, list[Callable | str]],
  1066. columns: Iterable[Hashable],
  1067. order: Iterable[int],
  1068. ) -> dict[Hashable, Series]:
  1069. """
  1070. Internal function to reorder result if relabelling is True for
  1071. dataframe.agg, and return the reordered result in dict.
  1072. Parameters:
  1073. ----------
  1074. result: Result from aggregation
  1075. func: Dict of (column name, funcs)
  1076. columns: New columns name for relabelling
  1077. order: New order for relabelling
  1078. Examples:
  1079. ---------
  1080. >>> result = DataFrame({"A": [np.nan, 2, np.nan],
  1081. ... "C": [6, np.nan, np.nan], "B": [np.nan, 4, 2.5]}) # doctest: +SKIP
  1082. >>> funcs = {"A": ["max"], "C": ["max"], "B": ["mean", "min"]}
  1083. >>> columns = ("foo", "aab", "bar", "dat")
  1084. >>> order = [0, 1, 2, 3]
  1085. >>> _relabel_result(result, func, columns, order) # doctest: +SKIP
  1086. dict(A=Series([2.0, NaN, NaN, NaN], index=["foo", "aab", "bar", "dat"]),
  1087. C=Series([NaN, 6.0, NaN, NaN], index=["foo", "aab", "bar", "dat"]),
  1088. B=Series([NaN, NaN, 2.5, 4.0], index=["foo", "aab", "bar", "dat"]))
  1089. """
  1090. from pandas.core.indexes.base import Index
  1091. reordered_indexes = [
  1092. pair[0] for pair in sorted(zip(columns, order), key=lambda t: t[1])
  1093. ]
  1094. reordered_result_in_dict: dict[Hashable, Series] = {}
  1095. idx = 0
  1096. reorder_mask = not isinstance(result, ABCSeries) and len(result.columns) > 1
  1097. for col, fun in func.items():
  1098. s = result[col].dropna()
  1099. # In the `_aggregate`, the callable names are obtained and used in `result`, and
  1100. # these names are ordered alphabetically. e.g.
  1101. # C2 C1
  1102. # <lambda> 1 NaN
  1103. # amax NaN 4.0
  1104. # max NaN 4.0
  1105. # sum 18.0 6.0
  1106. # Therefore, the order of functions for each column could be shuffled
  1107. # accordingly so need to get the callable name if it is not parsed names, and
  1108. # reorder the aggregated result for each column.
  1109. # e.g. if df.agg(c1=("C2", sum), c2=("C2", lambda x: min(x))), correct order is
  1110. # [sum, <lambda>], but in `result`, it will be [<lambda>, sum], and we need to
  1111. # reorder so that aggregated values map to their functions regarding the order.
  1112. # However there is only one column being used for aggregation, not need to
  1113. # reorder since the index is not sorted, and keep as is in `funcs`, e.g.
  1114. # A
  1115. # min 1.0
  1116. # mean 1.5
  1117. # mean 1.5
  1118. if reorder_mask:
  1119. fun = [
  1120. com.get_callable_name(f) if not isinstance(f, str) else f for f in fun
  1121. ]
  1122. col_idx_order = Index(s.index).get_indexer(fun)
  1123. s = s[col_idx_order]
  1124. # assign the new user-provided "named aggregation" as index names, and reindex
  1125. # it based on the whole user-provided names.
  1126. s.index = reordered_indexes[idx : idx + len(fun)]
  1127. reordered_result_in_dict[col] = s.reindex(columns, copy=False)
  1128. idx = idx + len(fun)
  1129. return reordered_result_in_dict
  1130. # TODO: Can't use, because mypy doesn't like us setting __name__
  1131. # error: "partial[Any]" has no attribute "__name__"
  1132. # the type is:
  1133. # typing.Sequence[Callable[..., ScalarResult]]
  1134. # -> typing.Sequence[Callable[..., ScalarResult]]:
  1135. def _managle_lambda_list(aggfuncs: Sequence[Any]) -> Sequence[Any]:
  1136. """
  1137. Possibly mangle a list of aggfuncs.
  1138. Parameters
  1139. ----------
  1140. aggfuncs : Sequence
  1141. Returns
  1142. -------
  1143. mangled: list-like
  1144. A new AggSpec sequence, where lambdas have been converted
  1145. to have unique names.
  1146. Notes
  1147. -----
  1148. If just one aggfunc is passed, the name will not be mangled.
  1149. """
  1150. if len(aggfuncs) <= 1:
  1151. # don't mangle for .agg([lambda x: .])
  1152. return aggfuncs
  1153. i = 0
  1154. mangled_aggfuncs = []
  1155. for aggfunc in aggfuncs:
  1156. if com.get_callable_name(aggfunc) == "<lambda>":
  1157. aggfunc = partial(aggfunc)
  1158. aggfunc.__name__ = f"<lambda_{i}>"
  1159. i += 1
  1160. mangled_aggfuncs.append(aggfunc)
  1161. return mangled_aggfuncs
  1162. def maybe_mangle_lambdas(agg_spec: Any) -> Any:
  1163. """
  1164. Make new lambdas with unique names.
  1165. Parameters
  1166. ----------
  1167. agg_spec : Any
  1168. An argument to GroupBy.agg.
  1169. Non-dict-like `agg_spec` are pass through as is.
  1170. For dict-like `agg_spec` a new spec is returned
  1171. with name-mangled lambdas.
  1172. Returns
  1173. -------
  1174. mangled : Any
  1175. Same type as the input.
  1176. Examples
  1177. --------
  1178. >>> maybe_mangle_lambdas('sum')
  1179. 'sum'
  1180. >>> maybe_mangle_lambdas([lambda: 1, lambda: 2]) # doctest: +SKIP
  1181. [<function __main__.<lambda_0>,
  1182. <function pandas...._make_lambda.<locals>.f(*args, **kwargs)>]
  1183. """
  1184. is_dict = is_dict_like(agg_spec)
  1185. if not (is_dict or is_list_like(agg_spec)):
  1186. return agg_spec
  1187. mangled_aggspec = type(agg_spec)() # dict or OrderedDict
  1188. if is_dict:
  1189. for key, aggfuncs in agg_spec.items():
  1190. if is_list_like(aggfuncs) and not is_dict_like(aggfuncs):
  1191. mangled_aggfuncs = _managle_lambda_list(aggfuncs)
  1192. else:
  1193. mangled_aggfuncs = aggfuncs
  1194. mangled_aggspec[key] = mangled_aggfuncs
  1195. else:
  1196. mangled_aggspec = _managle_lambda_list(agg_spec)
  1197. return mangled_aggspec
  1198. def validate_func_kwargs(
  1199. kwargs: dict,
  1200. ) -> tuple[list[str], list[str | Callable[..., Any]]]:
  1201. """
  1202. Validates types of user-provided "named aggregation" kwargs.
  1203. `TypeError` is raised if aggfunc is not `str` or callable.
  1204. Parameters
  1205. ----------
  1206. kwargs : dict
  1207. Returns
  1208. -------
  1209. columns : List[str]
  1210. List of user-provied keys.
  1211. func : List[Union[str, callable[...,Any]]]
  1212. List of user-provided aggfuncs
  1213. Examples
  1214. --------
  1215. >>> validate_func_kwargs({'one': 'min', 'two': 'max'})
  1216. (['one', 'two'], ['min', 'max'])
  1217. """
  1218. tuple_given_message = "func is expected but received {} in **kwargs."
  1219. columns = list(kwargs)
  1220. func = []
  1221. for col_func in kwargs.values():
  1222. if not (isinstance(col_func, str) or callable(col_func)):
  1223. raise TypeError(tuple_given_message.format(type(col_func).__name__))
  1224. func.append(col_func)
  1225. if not columns:
  1226. no_arg_message = "Must provide 'func' or named aggregation **kwargs."
  1227. raise TypeError(no_arg_message)
  1228. return columns, func