_validators.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. """
  2. Module that contains many useful utilities
  3. for validating data or function arguments
  4. """
  5. from __future__ import annotations
  6. from typing import (
  7. Iterable,
  8. Sequence,
  9. TypeVar,
  10. overload,
  11. )
  12. import numpy as np
  13. from pandas._libs import lib
  14. from pandas.core.dtypes.common import (
  15. is_bool,
  16. is_integer,
  17. )
  18. BoolishT = TypeVar("BoolishT", bool, int)
  19. BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None)
  20. def _check_arg_length(fname, args, max_fname_arg_count, compat_args):
  21. """
  22. Checks whether 'args' has length of at most 'compat_args'. Raises
  23. a TypeError if that is not the case, similar to in Python when a
  24. function is called with too many arguments.
  25. """
  26. if max_fname_arg_count < 0:
  27. raise ValueError("'max_fname_arg_count' must be non-negative")
  28. if len(args) > len(compat_args):
  29. max_arg_count = len(compat_args) + max_fname_arg_count
  30. actual_arg_count = len(args) + max_fname_arg_count
  31. argument = "argument" if max_arg_count == 1 else "arguments"
  32. raise TypeError(
  33. f"{fname}() takes at most {max_arg_count} {argument} "
  34. f"({actual_arg_count} given)"
  35. )
  36. def _check_for_default_values(fname, arg_val_dict, compat_args):
  37. """
  38. Check that the keys in `arg_val_dict` are mapped to their
  39. default values as specified in `compat_args`.
  40. Note that this function is to be called only when it has been
  41. checked that arg_val_dict.keys() is a subset of compat_args
  42. """
  43. for key in arg_val_dict:
  44. # try checking equality directly with '=' operator,
  45. # as comparison may have been overridden for the left
  46. # hand object
  47. try:
  48. v1 = arg_val_dict[key]
  49. v2 = compat_args[key]
  50. # check for None-ness otherwise we could end up
  51. # comparing a numpy array vs None
  52. if (v1 is not None and v2 is None) or (v1 is None and v2 is not None):
  53. match = False
  54. else:
  55. match = v1 == v2
  56. if not is_bool(match):
  57. raise ValueError("'match' is not a boolean")
  58. # could not compare them directly, so try comparison
  59. # using the 'is' operator
  60. except ValueError:
  61. match = arg_val_dict[key] is compat_args[key]
  62. if not match:
  63. raise ValueError(
  64. f"the '{key}' parameter is not supported in "
  65. f"the pandas implementation of {fname}()"
  66. )
  67. def validate_args(fname, args, max_fname_arg_count, compat_args) -> None:
  68. """
  69. Checks whether the length of the `*args` argument passed into a function
  70. has at most `len(compat_args)` arguments and whether or not all of these
  71. elements in `args` are set to their default values.
  72. Parameters
  73. ----------
  74. fname : str
  75. The name of the function being passed the `*args` parameter
  76. args : tuple
  77. The `*args` parameter passed into a function
  78. max_fname_arg_count : int
  79. The maximum number of arguments that the function `fname`
  80. can accept, excluding those in `args`. Used for displaying
  81. appropriate error messages. Must be non-negative.
  82. compat_args : dict
  83. A dictionary of keys and their associated default values.
  84. In order to accommodate buggy behaviour in some versions of `numpy`,
  85. where a signature displayed keyword arguments but then passed those
  86. arguments **positionally** internally when calling downstream
  87. implementations, a dict ensures that the original
  88. order of the keyword arguments is enforced.
  89. Raises
  90. ------
  91. TypeError
  92. If `args` contains more values than there are `compat_args`
  93. ValueError
  94. If `args` contains values that do not correspond to those
  95. of the default values specified in `compat_args`
  96. """
  97. _check_arg_length(fname, args, max_fname_arg_count, compat_args)
  98. # We do this so that we can provide a more informative
  99. # error message about the parameters that we are not
  100. # supporting in the pandas implementation of 'fname'
  101. kwargs = dict(zip(compat_args, args))
  102. _check_for_default_values(fname, kwargs, compat_args)
  103. def _check_for_invalid_keys(fname, kwargs, compat_args):
  104. """
  105. Checks whether 'kwargs' contains any keys that are not
  106. in 'compat_args' and raises a TypeError if there is one.
  107. """
  108. # set(dict) --> set of the dictionary's keys
  109. diff = set(kwargs) - set(compat_args)
  110. if diff:
  111. bad_arg = list(diff)[0]
  112. raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'")
  113. def validate_kwargs(fname, kwargs, compat_args) -> None:
  114. """
  115. Checks whether parameters passed to the **kwargs argument in a
  116. function `fname` are valid parameters as specified in `*compat_args`
  117. and whether or not they are set to their default values.
  118. Parameters
  119. ----------
  120. fname : str
  121. The name of the function being passed the `**kwargs` parameter
  122. kwargs : dict
  123. The `**kwargs` parameter passed into `fname`
  124. compat_args: dict
  125. A dictionary of keys that `kwargs` is allowed to have and their
  126. associated default values
  127. Raises
  128. ------
  129. TypeError if `kwargs` contains keys not in `compat_args`
  130. ValueError if `kwargs` contains keys in `compat_args` that do not
  131. map to the default values specified in `compat_args`
  132. """
  133. kwds = kwargs.copy()
  134. _check_for_invalid_keys(fname, kwargs, compat_args)
  135. _check_for_default_values(fname, kwds, compat_args)
  136. def validate_args_and_kwargs(
  137. fname, args, kwargs, max_fname_arg_count, compat_args
  138. ) -> None:
  139. """
  140. Checks whether parameters passed to the *args and **kwargs argument in a
  141. function `fname` are valid parameters as specified in `*compat_args`
  142. and whether or not they are set to their default values.
  143. Parameters
  144. ----------
  145. fname: str
  146. The name of the function being passed the `**kwargs` parameter
  147. args: tuple
  148. The `*args` parameter passed into a function
  149. kwargs: dict
  150. The `**kwargs` parameter passed into `fname`
  151. max_fname_arg_count: int
  152. The minimum number of arguments that the function `fname`
  153. requires, excluding those in `args`. Used for displaying
  154. appropriate error messages. Must be non-negative.
  155. compat_args: dict
  156. A dictionary of keys that `kwargs` is allowed to
  157. have and their associated default values.
  158. Raises
  159. ------
  160. TypeError if `args` contains more values than there are
  161. `compat_args` OR `kwargs` contains keys not in `compat_args`
  162. ValueError if `args` contains values not at the default value (`None`)
  163. `kwargs` contains keys in `compat_args` that do not map to the default
  164. value as specified in `compat_args`
  165. See Also
  166. --------
  167. validate_args : Purely args validation.
  168. validate_kwargs : Purely kwargs validation.
  169. """
  170. # Check that the total number of arguments passed in (i.e.
  171. # args and kwargs) does not exceed the length of compat_args
  172. _check_arg_length(
  173. fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args
  174. )
  175. # Check there is no overlap with the positional and keyword
  176. # arguments, similar to what is done in actual Python functions
  177. args_dict = dict(zip(compat_args, args))
  178. for key in args_dict:
  179. if key in kwargs:
  180. raise TypeError(
  181. f"{fname}() got multiple values for keyword argument '{key}'"
  182. )
  183. kwargs.update(args_dict)
  184. validate_kwargs(fname, kwargs, compat_args)
  185. def validate_bool_kwarg(
  186. value: BoolishNoneT, arg_name, none_allowed: bool = True, int_allowed: bool = False
  187. ) -> BoolishNoneT:
  188. """
  189. Ensure that argument passed in arg_name can be interpreted as boolean.
  190. Parameters
  191. ----------
  192. value : bool
  193. Value to be validated.
  194. arg_name : str
  195. Name of the argument. To be reflected in the error message.
  196. none_allowed : bool, default True
  197. Whether to consider None to be a valid boolean.
  198. int_allowed : bool, default False
  199. Whether to consider integer value to be a valid boolean.
  200. Returns
  201. -------
  202. value
  203. The same value as input.
  204. Raises
  205. ------
  206. ValueError
  207. If the value is not a valid boolean.
  208. """
  209. good_value = is_bool(value)
  210. if none_allowed:
  211. good_value = good_value or value is None
  212. if int_allowed:
  213. good_value = good_value or isinstance(value, int)
  214. if not good_value:
  215. raise ValueError(
  216. f'For argument "{arg_name}" expected type bool, received '
  217. f"type {type(value).__name__}."
  218. )
  219. return value
  220. def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True):
  221. """
  222. Validate the keyword arguments to 'fillna'.
  223. This checks that exactly one of 'value' and 'method' is specified.
  224. If 'method' is specified, this validates that it's a valid method.
  225. Parameters
  226. ----------
  227. value, method : object
  228. The 'value' and 'method' keyword arguments for 'fillna'.
  229. validate_scalar_dict_value : bool, default True
  230. Whether to validate that 'value' is a scalar or dict. Specifically,
  231. validate that it is not a list or tuple.
  232. Returns
  233. -------
  234. value, method : object
  235. """
  236. from pandas.core.missing import clean_fill_method
  237. if value is None and method is None:
  238. raise ValueError("Must specify a fill 'value' or 'method'.")
  239. if value is None and method is not None:
  240. method = clean_fill_method(method)
  241. elif value is not None and method is None:
  242. if validate_scalar_dict_value and isinstance(value, (list, tuple)):
  243. raise TypeError(
  244. '"value" parameter must be a scalar or dict, but '
  245. f'you passed a "{type(value).__name__}"'
  246. )
  247. elif value is not None and method is not None:
  248. raise ValueError("Cannot specify both 'value' and 'method'.")
  249. return value, method
  250. def validate_percentile(q: float | Iterable[float]) -> np.ndarray:
  251. """
  252. Validate percentiles (used by describe and quantile).
  253. This function checks if the given float or iterable of floats is a valid percentile
  254. otherwise raises a ValueError.
  255. Parameters
  256. ----------
  257. q: float or iterable of floats
  258. A single percentile or an iterable of percentiles.
  259. Returns
  260. -------
  261. ndarray
  262. An ndarray of the percentiles if valid.
  263. Raises
  264. ------
  265. ValueError if percentiles are not in given interval([0, 1]).
  266. """
  267. q_arr = np.asarray(q)
  268. # Don't change this to an f-string. The string formatting
  269. # is too expensive for cases where we don't need it.
  270. msg = "percentiles should all be in the interval [0, 1]. Try {} instead."
  271. if q_arr.ndim == 0:
  272. if not 0 <= q_arr <= 1:
  273. raise ValueError(msg.format(q_arr / 100.0))
  274. else:
  275. if not all(0 <= qs <= 1 for qs in q_arr):
  276. raise ValueError(msg.format(q_arr / 100.0))
  277. return q_arr
  278. @overload
  279. def validate_ascending(ascending: BoolishT) -> BoolishT:
  280. ...
  281. @overload
  282. def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]:
  283. ...
  284. def validate_ascending(
  285. ascending: bool | int | Sequence[BoolishT],
  286. ) -> bool | int | list[BoolishT]:
  287. """Validate ``ascending`` kwargs for ``sort_index`` method."""
  288. kwargs = {"none_allowed": False, "int_allowed": True}
  289. if not isinstance(ascending, Sequence):
  290. return validate_bool_kwarg(ascending, "ascending", **kwargs)
  291. return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending]
  292. def validate_endpoints(closed: str | None) -> tuple[bool, bool]:
  293. """
  294. Check that the `closed` argument is among [None, "left", "right"]
  295. Parameters
  296. ----------
  297. closed : {None, "left", "right"}
  298. Returns
  299. -------
  300. left_closed : bool
  301. right_closed : bool
  302. Raises
  303. ------
  304. ValueError : if argument is not among valid values
  305. """
  306. left_closed = False
  307. right_closed = False
  308. if closed is None:
  309. left_closed = True
  310. right_closed = True
  311. elif closed == "left":
  312. left_closed = True
  313. elif closed == "right":
  314. right_closed = True
  315. else:
  316. raise ValueError("Closed has to be either 'left', 'right' or None")
  317. return left_closed, right_closed
  318. def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]:
  319. """
  320. Check that the `inclusive` argument is among {"both", "neither", "left", "right"}.
  321. Parameters
  322. ----------
  323. inclusive : {"both", "neither", "left", "right"}
  324. Returns
  325. -------
  326. left_right_inclusive : tuple[bool, bool]
  327. Raises
  328. ------
  329. ValueError : if argument is not among valid values
  330. """
  331. left_right_inclusive: tuple[bool, bool] | None = None
  332. if isinstance(inclusive, str):
  333. left_right_inclusive = {
  334. "both": (True, True),
  335. "left": (True, False),
  336. "right": (False, True),
  337. "neither": (False, False),
  338. }.get(inclusive)
  339. if left_right_inclusive is None:
  340. raise ValueError(
  341. "Inclusive has to be either 'both', 'neither', 'left' or 'right'"
  342. )
  343. return left_right_inclusive
  344. def validate_insert_loc(loc: int, length: int) -> int:
  345. """
  346. Check that we have an integer between -length and length, inclusive.
  347. Standardize negative loc to within [0, length].
  348. The exceptions we raise on failure match np.insert.
  349. """
  350. if not is_integer(loc):
  351. raise TypeError(f"loc must be an integer between -{length} and {length}")
  352. if loc < 0:
  353. loc += length
  354. if not 0 <= loc <= length:
  355. raise IndexError(f"loc must be an integer between -{length} and {length}")
  356. return loc
  357. def check_dtype_backend(dtype_backend) -> None:
  358. if dtype_backend is not lib.no_default:
  359. if dtype_backend not in ["numpy_nullable", "pyarrow"]:
  360. raise ValueError(
  361. f"dtype_backend {dtype_backend} is invalid, only 'numpy_nullable' and "
  362. f"'pyarrow' are allowed.",
  363. )