123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451 |
- """
- Module that contains many useful utilities
- for validating data or function arguments
- """
- from __future__ import annotations
- from typing import (
- Iterable,
- Sequence,
- TypeVar,
- overload,
- )
- import numpy as np
- from pandas._libs import lib
- from pandas.core.dtypes.common import (
- is_bool,
- is_integer,
- )
- BoolishT = TypeVar("BoolishT", bool, int)
- BoolishNoneT = TypeVar("BoolishNoneT", bool, int, None)
- def _check_arg_length(fname, args, max_fname_arg_count, compat_args):
- """
- Checks whether 'args' has length of at most 'compat_args'. Raises
- a TypeError if that is not the case, similar to in Python when a
- function is called with too many arguments.
- """
- if max_fname_arg_count < 0:
- raise ValueError("'max_fname_arg_count' must be non-negative")
- if len(args) > len(compat_args):
- max_arg_count = len(compat_args) + max_fname_arg_count
- actual_arg_count = len(args) + max_fname_arg_count
- argument = "argument" if max_arg_count == 1 else "arguments"
- raise TypeError(
- f"{fname}() takes at most {max_arg_count} {argument} "
- f"({actual_arg_count} given)"
- )
- def _check_for_default_values(fname, arg_val_dict, compat_args):
- """
- Check that the keys in `arg_val_dict` are mapped to their
- default values as specified in `compat_args`.
- Note that this function is to be called only when it has been
- checked that arg_val_dict.keys() is a subset of compat_args
- """
- for key in arg_val_dict:
- # try checking equality directly with '=' operator,
- # as comparison may have been overridden for the left
- # hand object
- try:
- v1 = arg_val_dict[key]
- v2 = compat_args[key]
- # check for None-ness otherwise we could end up
- # comparing a numpy array vs None
- if (v1 is not None and v2 is None) or (v1 is None and v2 is not None):
- match = False
- else:
- match = v1 == v2
- if not is_bool(match):
- raise ValueError("'match' is not a boolean")
- # could not compare them directly, so try comparison
- # using the 'is' operator
- except ValueError:
- match = arg_val_dict[key] is compat_args[key]
- if not match:
- raise ValueError(
- f"the '{key}' parameter is not supported in "
- f"the pandas implementation of {fname}()"
- )
- def validate_args(fname, args, max_fname_arg_count, compat_args) -> None:
- """
- Checks whether the length of the `*args` argument passed into a function
- has at most `len(compat_args)` arguments and whether or not all of these
- elements in `args` are set to their default values.
- Parameters
- ----------
- fname : str
- The name of the function being passed the `*args` parameter
- args : tuple
- The `*args` parameter passed into a function
- max_fname_arg_count : int
- The maximum number of arguments that the function `fname`
- can accept, excluding those in `args`. Used for displaying
- appropriate error messages. Must be non-negative.
- compat_args : dict
- A dictionary of keys and their associated default values.
- In order to accommodate buggy behaviour in some versions of `numpy`,
- where a signature displayed keyword arguments but then passed those
- arguments **positionally** internally when calling downstream
- implementations, a dict ensures that the original
- order of the keyword arguments is enforced.
- Raises
- ------
- TypeError
- If `args` contains more values than there are `compat_args`
- ValueError
- If `args` contains values that do not correspond to those
- of the default values specified in `compat_args`
- """
- _check_arg_length(fname, args, max_fname_arg_count, compat_args)
- # We do this so that we can provide a more informative
- # error message about the parameters that we are not
- # supporting in the pandas implementation of 'fname'
- kwargs = dict(zip(compat_args, args))
- _check_for_default_values(fname, kwargs, compat_args)
- def _check_for_invalid_keys(fname, kwargs, compat_args):
- """
- Checks whether 'kwargs' contains any keys that are not
- in 'compat_args' and raises a TypeError if there is one.
- """
- # set(dict) --> set of the dictionary's keys
- diff = set(kwargs) - set(compat_args)
- if diff:
- bad_arg = list(diff)[0]
- raise TypeError(f"{fname}() got an unexpected keyword argument '{bad_arg}'")
- def validate_kwargs(fname, kwargs, compat_args) -> None:
- """
- Checks whether parameters passed to the **kwargs argument in a
- function `fname` are valid parameters as specified in `*compat_args`
- and whether or not they are set to their default values.
- Parameters
- ----------
- fname : str
- The name of the function being passed the `**kwargs` parameter
- kwargs : dict
- The `**kwargs` parameter passed into `fname`
- compat_args: dict
- A dictionary of keys that `kwargs` is allowed to have and their
- associated default values
- Raises
- ------
- TypeError if `kwargs` contains keys not in `compat_args`
- ValueError if `kwargs` contains keys in `compat_args` that do not
- map to the default values specified in `compat_args`
- """
- kwds = kwargs.copy()
- _check_for_invalid_keys(fname, kwargs, compat_args)
- _check_for_default_values(fname, kwds, compat_args)
- def validate_args_and_kwargs(
- fname, args, kwargs, max_fname_arg_count, compat_args
- ) -> None:
- """
- Checks whether parameters passed to the *args and **kwargs argument in a
- function `fname` are valid parameters as specified in `*compat_args`
- and whether or not they are set to their default values.
- Parameters
- ----------
- fname: str
- The name of the function being passed the `**kwargs` parameter
- args: tuple
- The `*args` parameter passed into a function
- kwargs: dict
- The `**kwargs` parameter passed into `fname`
- max_fname_arg_count: int
- The minimum number of arguments that the function `fname`
- requires, excluding those in `args`. Used for displaying
- appropriate error messages. Must be non-negative.
- compat_args: dict
- A dictionary of keys that `kwargs` is allowed to
- have and their associated default values.
- Raises
- ------
- TypeError if `args` contains more values than there are
- `compat_args` OR `kwargs` contains keys not in `compat_args`
- ValueError if `args` contains values not at the default value (`None`)
- `kwargs` contains keys in `compat_args` that do not map to the default
- value as specified in `compat_args`
- See Also
- --------
- validate_args : Purely args validation.
- validate_kwargs : Purely kwargs validation.
- """
- # Check that the total number of arguments passed in (i.e.
- # args and kwargs) does not exceed the length of compat_args
- _check_arg_length(
- fname, args + tuple(kwargs.values()), max_fname_arg_count, compat_args
- )
- # Check there is no overlap with the positional and keyword
- # arguments, similar to what is done in actual Python functions
- args_dict = dict(zip(compat_args, args))
- for key in args_dict:
- if key in kwargs:
- raise TypeError(
- f"{fname}() got multiple values for keyword argument '{key}'"
- )
- kwargs.update(args_dict)
- validate_kwargs(fname, kwargs, compat_args)
- def validate_bool_kwarg(
- value: BoolishNoneT, arg_name, none_allowed: bool = True, int_allowed: bool = False
- ) -> BoolishNoneT:
- """
- Ensure that argument passed in arg_name can be interpreted as boolean.
- Parameters
- ----------
- value : bool
- Value to be validated.
- arg_name : str
- Name of the argument. To be reflected in the error message.
- none_allowed : bool, default True
- Whether to consider None to be a valid boolean.
- int_allowed : bool, default False
- Whether to consider integer value to be a valid boolean.
- Returns
- -------
- value
- The same value as input.
- Raises
- ------
- ValueError
- If the value is not a valid boolean.
- """
- good_value = is_bool(value)
- if none_allowed:
- good_value = good_value or value is None
- if int_allowed:
- good_value = good_value or isinstance(value, int)
- if not good_value:
- raise ValueError(
- f'For argument "{arg_name}" expected type bool, received '
- f"type {type(value).__name__}."
- )
- return value
- def validate_fillna_kwargs(value, method, validate_scalar_dict_value: bool = True):
- """
- Validate the keyword arguments to 'fillna'.
- This checks that exactly one of 'value' and 'method' is specified.
- If 'method' is specified, this validates that it's a valid method.
- Parameters
- ----------
- value, method : object
- The 'value' and 'method' keyword arguments for 'fillna'.
- validate_scalar_dict_value : bool, default True
- Whether to validate that 'value' is a scalar or dict. Specifically,
- validate that it is not a list or tuple.
- Returns
- -------
- value, method : object
- """
- from pandas.core.missing import clean_fill_method
- if value is None and method is None:
- raise ValueError("Must specify a fill 'value' or 'method'.")
- if value is None and method is not None:
- method = clean_fill_method(method)
- elif value is not None and method is None:
- if validate_scalar_dict_value and isinstance(value, (list, tuple)):
- raise TypeError(
- '"value" parameter must be a scalar or dict, but '
- f'you passed a "{type(value).__name__}"'
- )
- elif value is not None and method is not None:
- raise ValueError("Cannot specify both 'value' and 'method'.")
- return value, method
- def validate_percentile(q: float | Iterable[float]) -> np.ndarray:
- """
- Validate percentiles (used by describe and quantile).
- This function checks if the given float or iterable of floats is a valid percentile
- otherwise raises a ValueError.
- Parameters
- ----------
- q: float or iterable of floats
- A single percentile or an iterable of percentiles.
- Returns
- -------
- ndarray
- An ndarray of the percentiles if valid.
- Raises
- ------
- ValueError if percentiles are not in given interval([0, 1]).
- """
- q_arr = np.asarray(q)
- # Don't change this to an f-string. The string formatting
- # is too expensive for cases where we don't need it.
- msg = "percentiles should all be in the interval [0, 1]. Try {} instead."
- if q_arr.ndim == 0:
- if not 0 <= q_arr <= 1:
- raise ValueError(msg.format(q_arr / 100.0))
- else:
- if not all(0 <= qs <= 1 for qs in q_arr):
- raise ValueError(msg.format(q_arr / 100.0))
- return q_arr
- @overload
- def validate_ascending(ascending: BoolishT) -> BoolishT:
- ...
- @overload
- def validate_ascending(ascending: Sequence[BoolishT]) -> list[BoolishT]:
- ...
- def validate_ascending(
- ascending: bool | int | Sequence[BoolishT],
- ) -> bool | int | list[BoolishT]:
- """Validate ``ascending`` kwargs for ``sort_index`` method."""
- kwargs = {"none_allowed": False, "int_allowed": True}
- if not isinstance(ascending, Sequence):
- return validate_bool_kwarg(ascending, "ascending", **kwargs)
- return [validate_bool_kwarg(item, "ascending", **kwargs) for item in ascending]
- def validate_endpoints(closed: str | None) -> tuple[bool, bool]:
- """
- Check that the `closed` argument is among [None, "left", "right"]
- Parameters
- ----------
- closed : {None, "left", "right"}
- Returns
- -------
- left_closed : bool
- right_closed : bool
- Raises
- ------
- ValueError : if argument is not among valid values
- """
- left_closed = False
- right_closed = False
- if closed is None:
- left_closed = True
- right_closed = True
- elif closed == "left":
- left_closed = True
- elif closed == "right":
- right_closed = True
- else:
- raise ValueError("Closed has to be either 'left', 'right' or None")
- return left_closed, right_closed
- def validate_inclusive(inclusive: str | None) -> tuple[bool, bool]:
- """
- Check that the `inclusive` argument is among {"both", "neither", "left", "right"}.
- Parameters
- ----------
- inclusive : {"both", "neither", "left", "right"}
- Returns
- -------
- left_right_inclusive : tuple[bool, bool]
- Raises
- ------
- ValueError : if argument is not among valid values
- """
- left_right_inclusive: tuple[bool, bool] | None = None
- if isinstance(inclusive, str):
- left_right_inclusive = {
- "both": (True, True),
- "left": (True, False),
- "right": (False, True),
- "neither": (False, False),
- }.get(inclusive)
- if left_right_inclusive is None:
- raise ValueError(
- "Inclusive has to be either 'both', 'neither', 'left' or 'right'"
- )
- return left_right_inclusive
- def validate_insert_loc(loc: int, length: int) -> int:
- """
- Check that we have an integer between -length and length, inclusive.
- Standardize negative loc to within [0, length].
- The exceptions we raise on failure match np.insert.
- """
- if not is_integer(loc):
- raise TypeError(f"loc must be an integer between -{length} and {length}")
- if loc < 0:
- loc += length
- if not 0 <= loc <= length:
- raise IndexError(f"loc must be an integer between -{length} and {length}")
- return loc
- def check_dtype_backend(dtype_backend) -> None:
- if dtype_backend is not lib.no_default:
- if dtype_backend not in ["numpy_nullable", "pyarrow"]:
- raise ValueError(
- f"dtype_backend {dtype_backend} is invalid, only 'numpy_nullable' and "
- f"'pyarrow' are allowed.",
- )
|