common.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653
  1. """
  2. Misc tools for implementing data structures
  3. Note: pandas.core.common is *not* part of the public API.
  4. """
  5. from __future__ import annotations
  6. import builtins
  7. from collections import (
  8. abc,
  9. defaultdict,
  10. )
  11. import contextlib
  12. from functools import partial
  13. import inspect
  14. from typing import (
  15. TYPE_CHECKING,
  16. Any,
  17. Callable,
  18. Collection,
  19. Generator,
  20. Hashable,
  21. Iterable,
  22. Sequence,
  23. cast,
  24. overload,
  25. )
  26. import warnings
  27. import numpy as np
  28. from pandas._libs import lib
  29. from pandas._typing import (
  30. AnyArrayLike,
  31. ArrayLike,
  32. NpDtype,
  33. RandomState,
  34. T,
  35. )
  36. from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
  37. from pandas.core.dtypes.common import (
  38. is_array_like,
  39. is_bool_dtype,
  40. is_extension_array_dtype,
  41. is_integer,
  42. )
  43. from pandas.core.dtypes.generic import (
  44. ABCExtensionArray,
  45. ABCIndex,
  46. ABCSeries,
  47. )
  48. from pandas.core.dtypes.inference import iterable_not_string
  49. from pandas.core.dtypes.missing import isna
  50. if TYPE_CHECKING:
  51. from pandas import Index
  52. def flatten(line):
  53. """
  54. Flatten an arbitrarily nested sequence.
  55. Parameters
  56. ----------
  57. line : sequence
  58. The non string sequence to flatten
  59. Notes
  60. -----
  61. This doesn't consider strings sequences.
  62. Returns
  63. -------
  64. flattened : generator
  65. """
  66. for element in line:
  67. if iterable_not_string(element):
  68. yield from flatten(element)
  69. else:
  70. yield element
  71. def consensus_name_attr(objs):
  72. name = objs[0].name
  73. for obj in objs[1:]:
  74. try:
  75. if obj.name != name:
  76. name = None
  77. except ValueError:
  78. name = None
  79. return name
  80. def is_bool_indexer(key: Any) -> bool:
  81. """
  82. Check whether `key` is a valid boolean indexer.
  83. Parameters
  84. ----------
  85. key : Any
  86. Only list-likes may be considered boolean indexers.
  87. All other types are not considered a boolean indexer.
  88. For array-like input, boolean ndarrays or ExtensionArrays
  89. with ``_is_boolean`` set are considered boolean indexers.
  90. Returns
  91. -------
  92. bool
  93. Whether `key` is a valid boolean indexer.
  94. Raises
  95. ------
  96. ValueError
  97. When the array is an object-dtype ndarray or ExtensionArray
  98. and contains missing values.
  99. See Also
  100. --------
  101. check_array_indexer : Check that `key` is a valid array to index,
  102. and convert to an ndarray.
  103. """
  104. if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or (
  105. is_array_like(key) and is_extension_array_dtype(key.dtype)
  106. ):
  107. if key.dtype == np.object_:
  108. key_array = np.asarray(key)
  109. if not lib.is_bool_array(key_array):
  110. na_msg = "Cannot mask with non-boolean array containing NA / NaN values"
  111. if lib.infer_dtype(key_array) == "boolean" and isna(key_array).any():
  112. # Don't raise on e.g. ["A", "B", np.nan], see
  113. # test_loc_getitem_list_of_labels_categoricalindex_with_na
  114. raise ValueError(na_msg)
  115. return False
  116. return True
  117. elif is_bool_dtype(key.dtype):
  118. return True
  119. elif isinstance(key, list):
  120. # check if np.array(key).dtype would be bool
  121. if len(key) > 0:
  122. if type(key) is not list:
  123. # GH#42461 cython will raise TypeError if we pass a subclass
  124. key = list(key)
  125. return lib.is_bool_list(key)
  126. return False
  127. def cast_scalar_indexer(val):
  128. """
  129. Disallow indexing with a float key, even if that key is a round number.
  130. Parameters
  131. ----------
  132. val : scalar
  133. Returns
  134. -------
  135. outval : scalar
  136. """
  137. # assumes lib.is_scalar(val)
  138. if lib.is_float(val) and val.is_integer():
  139. raise IndexError(
  140. # GH#34193
  141. "Indexing with a float is no longer supported. Manually convert "
  142. "to an integer key instead."
  143. )
  144. return val
  145. def not_none(*args):
  146. """
  147. Returns a generator consisting of the arguments that are not None.
  148. """
  149. return (arg for arg in args if arg is not None)
  150. def any_none(*args) -> bool:
  151. """
  152. Returns a boolean indicating if any argument is None.
  153. """
  154. return any(arg is None for arg in args)
  155. def all_none(*args) -> bool:
  156. """
  157. Returns a boolean indicating if all arguments are None.
  158. """
  159. return all(arg is None for arg in args)
  160. def any_not_none(*args) -> bool:
  161. """
  162. Returns a boolean indicating if any argument is not None.
  163. """
  164. return any(arg is not None for arg in args)
  165. def all_not_none(*args) -> bool:
  166. """
  167. Returns a boolean indicating if all arguments are not None.
  168. """
  169. return all(arg is not None for arg in args)
  170. def count_not_none(*args) -> int:
  171. """
  172. Returns the count of arguments that are not None.
  173. """
  174. return sum(x is not None for x in args)
  175. @overload
  176. def asarray_tuplesafe(
  177. values: ArrayLike | list | tuple | zip, dtype: NpDtype | None = ...
  178. ) -> np.ndarray:
  179. # ExtensionArray can only be returned when values is an Index, all other iterables
  180. # will return np.ndarray. Unfortunately "all other" cannot be encoded in a type
  181. # signature, so instead we special-case some common types.
  182. ...
  183. @overload
  184. def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = ...) -> ArrayLike:
  185. ...
  186. def asarray_tuplesafe(values: Iterable, dtype: NpDtype | None = None) -> ArrayLike:
  187. if not (isinstance(values, (list, tuple)) or hasattr(values, "__array__")):
  188. values = list(values)
  189. elif isinstance(values, ABCIndex):
  190. return values._values
  191. if isinstance(values, list) and dtype in [np.object_, object]:
  192. return construct_1d_object_array_from_listlike(values)
  193. try:
  194. with warnings.catch_warnings():
  195. # Can remove warning filter once NumPy 1.24 is min version
  196. warnings.simplefilter("ignore", np.VisibleDeprecationWarning)
  197. result = np.asarray(values, dtype=dtype)
  198. except ValueError:
  199. # Using try/except since it's more performant than checking is_list_like
  200. # over each element
  201. # error: Argument 1 to "construct_1d_object_array_from_listlike"
  202. # has incompatible type "Iterable[Any]"; expected "Sized"
  203. return construct_1d_object_array_from_listlike(values) # type: ignore[arg-type]
  204. if issubclass(result.dtype.type, str):
  205. result = np.asarray(values, dtype=object)
  206. if result.ndim == 2:
  207. # Avoid building an array of arrays:
  208. values = [tuple(x) for x in values]
  209. result = construct_1d_object_array_from_listlike(values)
  210. return result
  211. def index_labels_to_array(
  212. labels: np.ndarray | Iterable, dtype: NpDtype | None = None
  213. ) -> np.ndarray:
  214. """
  215. Transform label or iterable of labels to array, for use in Index.
  216. Parameters
  217. ----------
  218. dtype : dtype
  219. If specified, use as dtype of the resulting array, otherwise infer.
  220. Returns
  221. -------
  222. array
  223. """
  224. if isinstance(labels, (str, tuple)):
  225. labels = [labels]
  226. if not isinstance(labels, (list, np.ndarray)):
  227. try:
  228. labels = list(labels)
  229. except TypeError: # non-iterable
  230. labels = [labels]
  231. labels = asarray_tuplesafe(labels, dtype=dtype)
  232. return labels
  233. def maybe_make_list(obj):
  234. if obj is not None and not isinstance(obj, (tuple, list)):
  235. return [obj]
  236. return obj
  237. def maybe_iterable_to_list(obj: Iterable[T] | T) -> Collection[T] | T:
  238. """
  239. If obj is Iterable but not list-like, consume into list.
  240. """
  241. if isinstance(obj, abc.Iterable) and not isinstance(obj, abc.Sized):
  242. return list(obj)
  243. obj = cast(Collection, obj)
  244. return obj
  245. def is_null_slice(obj) -> bool:
  246. """
  247. We have a null slice.
  248. """
  249. return (
  250. isinstance(obj, slice)
  251. and obj.start is None
  252. and obj.stop is None
  253. and obj.step is None
  254. )
  255. def is_empty_slice(obj) -> bool:
  256. """
  257. We have an empty slice, e.g. no values are selected.
  258. """
  259. return (
  260. isinstance(obj, slice)
  261. and obj.start is not None
  262. and obj.stop is not None
  263. and obj.start == obj.stop
  264. )
  265. def is_true_slices(line) -> list[bool]:
  266. """
  267. Find non-trivial slices in "line": return a list of booleans with same length.
  268. """
  269. return [isinstance(k, slice) and not is_null_slice(k) for k in line]
  270. # TODO: used only once in indexing; belongs elsewhere?
  271. def is_full_slice(obj, line: int) -> bool:
  272. """
  273. We have a full length slice.
  274. """
  275. return (
  276. isinstance(obj, slice)
  277. and obj.start == 0
  278. and obj.stop == line
  279. and obj.step is None
  280. )
  281. def get_callable_name(obj):
  282. # typical case has name
  283. if hasattr(obj, "__name__"):
  284. return getattr(obj, "__name__")
  285. # some objects don't; could recurse
  286. if isinstance(obj, partial):
  287. return get_callable_name(obj.func)
  288. # fall back to class name
  289. if callable(obj):
  290. return type(obj).__name__
  291. # everything failed (probably because the argument
  292. # wasn't actually callable); we return None
  293. # instead of the empty string in this case to allow
  294. # distinguishing between no name and a name of ''
  295. return None
  296. def apply_if_callable(maybe_callable, obj, **kwargs):
  297. """
  298. Evaluate possibly callable input using obj and kwargs if it is callable,
  299. otherwise return as it is.
  300. Parameters
  301. ----------
  302. maybe_callable : possibly a callable
  303. obj : NDFrame
  304. **kwargs
  305. """
  306. if callable(maybe_callable):
  307. return maybe_callable(obj, **kwargs)
  308. return maybe_callable
  309. def standardize_mapping(into):
  310. """
  311. Helper function to standardize a supplied mapping.
  312. Parameters
  313. ----------
  314. into : instance or subclass of collections.abc.Mapping
  315. Must be a class, an initialized collections.defaultdict,
  316. or an instance of a collections.abc.Mapping subclass.
  317. Returns
  318. -------
  319. mapping : a collections.abc.Mapping subclass or other constructor
  320. a callable object that can accept an iterator to create
  321. the desired Mapping.
  322. See Also
  323. --------
  324. DataFrame.to_dict
  325. Series.to_dict
  326. """
  327. if not inspect.isclass(into):
  328. if isinstance(into, defaultdict):
  329. return partial(defaultdict, into.default_factory)
  330. into = type(into)
  331. if not issubclass(into, abc.Mapping):
  332. raise TypeError(f"unsupported type: {into}")
  333. if into == defaultdict:
  334. raise TypeError("to_dict() only accepts initialized defaultdicts")
  335. return into
  336. @overload
  337. def random_state(state: np.random.Generator) -> np.random.Generator:
  338. ...
  339. @overload
  340. def random_state(
  341. state: int | ArrayLike | np.random.BitGenerator | np.random.RandomState | None,
  342. ) -> np.random.RandomState:
  343. ...
  344. def random_state(state: RandomState | None = None):
  345. """
  346. Helper function for processing random_state arguments.
  347. Parameters
  348. ----------
  349. state : int, array-like, BitGenerator, Generator, np.random.RandomState, None.
  350. If receives an int, array-like, or BitGenerator, passes to
  351. np.random.RandomState() as seed.
  352. If receives an np.random RandomState or Generator, just returns that unchanged.
  353. If receives `None`, returns np.random.
  354. If receives anything else, raises an informative ValueError.
  355. .. versionchanged:: 1.1.0
  356. array-like and BitGenerator object now passed to np.random.RandomState()
  357. as seed
  358. Default None.
  359. Returns
  360. -------
  361. np.random.RandomState or np.random.Generator. If state is None, returns np.random
  362. """
  363. if (
  364. is_integer(state)
  365. or is_array_like(state)
  366. or isinstance(state, np.random.BitGenerator)
  367. ):
  368. # error: Argument 1 to "RandomState" has incompatible type "Optional[Union[int,
  369. # Union[ExtensionArray, ndarray[Any, Any]], Generator, RandomState]]"; expected
  370. # "Union[None, Union[Union[_SupportsArray[dtype[Union[bool_, integer[Any]]]],
  371. # Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]],
  372. # Sequence[Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]]],
  373. # Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_,
  374. # integer[Any]]]]]]],
  375. # Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_,
  376. # integer[Any]]]]]]]]], Union[bool, int, Sequence[Union[bool, int]],
  377. # Sequence[Sequence[Union[bool, int]]], Sequence[Sequence[Sequence[Union[bool,
  378. # int]]]], Sequence[Sequence[Sequence[Sequence[Union[bool, int]]]]]]],
  379. # BitGenerator]"
  380. return np.random.RandomState(state) # type: ignore[arg-type]
  381. elif isinstance(state, np.random.RandomState):
  382. return state
  383. elif isinstance(state, np.random.Generator):
  384. return state
  385. elif state is None:
  386. return np.random
  387. else:
  388. raise ValueError(
  389. "random_state must be an integer, array-like, a BitGenerator, Generator, "
  390. "a numpy RandomState, or None"
  391. )
  392. def pipe(
  393. obj, func: Callable[..., T] | tuple[Callable[..., T], str], *args, **kwargs
  394. ) -> T:
  395. """
  396. Apply a function ``func`` to object ``obj`` either by passing obj as the
  397. first argument to the function or, in the case that the func is a tuple,
  398. interpret the first element of the tuple as a function and pass the obj to
  399. that function as a keyword argument whose key is the value of the second
  400. element of the tuple.
  401. Parameters
  402. ----------
  403. func : callable or tuple of (callable, str)
  404. Function to apply to this object or, alternatively, a
  405. ``(callable, data_keyword)`` tuple where ``data_keyword`` is a
  406. string indicating the keyword of ``callable`` that expects the
  407. object.
  408. *args : iterable, optional
  409. Positional arguments passed into ``func``.
  410. **kwargs : dict, optional
  411. A dictionary of keyword arguments passed into ``func``.
  412. Returns
  413. -------
  414. object : the return type of ``func``.
  415. """
  416. if isinstance(func, tuple):
  417. func, target = func
  418. if target in kwargs:
  419. msg = f"{target} is both the pipe target and a keyword argument"
  420. raise ValueError(msg)
  421. kwargs[target] = obj
  422. return func(*args, **kwargs)
  423. else:
  424. return func(obj, *args, **kwargs)
  425. def get_rename_function(mapper):
  426. """
  427. Returns a function that will map names/labels, dependent if mapper
  428. is a dict, Series or just a function.
  429. """
  430. def f(x):
  431. if x in mapper:
  432. return mapper[x]
  433. else:
  434. return x
  435. return f if isinstance(mapper, (abc.Mapping, ABCSeries)) else mapper
  436. def convert_to_list_like(
  437. values: Hashable | Iterable | AnyArrayLike,
  438. ) -> list | AnyArrayLike:
  439. """
  440. Convert list-like or scalar input to list-like. List, numpy and pandas array-like
  441. inputs are returned unmodified whereas others are converted to list.
  442. """
  443. if isinstance(values, (list, np.ndarray, ABCIndex, ABCSeries, ABCExtensionArray)):
  444. return values
  445. elif isinstance(values, abc.Iterable) and not isinstance(values, str):
  446. return list(values)
  447. return [values]
  448. @contextlib.contextmanager
  449. def temp_setattr(obj, attr: str, value) -> Generator[None, None, None]:
  450. """Temporarily set attribute on an object.
  451. Args:
  452. obj: Object whose attribute will be modified.
  453. attr: Attribute to modify.
  454. value: Value to temporarily set attribute to.
  455. Yields:
  456. obj with modified attribute.
  457. """
  458. old_value = getattr(obj, attr)
  459. setattr(obj, attr, value)
  460. try:
  461. yield obj
  462. finally:
  463. setattr(obj, attr, old_value)
  464. def require_length_match(data, index: Index) -> None:
  465. """
  466. Check the length of data matches the length of the index.
  467. """
  468. if len(data) != len(index):
  469. raise ValueError(
  470. "Length of values "
  471. f"({len(data)}) "
  472. "does not match length of index "
  473. f"({len(index)})"
  474. )
  475. # the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0,
  476. # whereas np.min and np.max (which directly call obj.min and obj.max)
  477. # default to axis=None.
  478. _builtin_table = {
  479. builtins.sum: np.sum,
  480. builtins.max: np.maximum.reduce,
  481. builtins.min: np.minimum.reduce,
  482. }
  483. _cython_table = {
  484. builtins.sum: "sum",
  485. builtins.max: "max",
  486. builtins.min: "min",
  487. np.all: "all",
  488. np.any: "any",
  489. np.sum: "sum",
  490. np.nansum: "sum",
  491. np.mean: "mean",
  492. np.nanmean: "mean",
  493. np.prod: "prod",
  494. np.nanprod: "prod",
  495. np.std: "std",
  496. np.nanstd: "std",
  497. np.var: "var",
  498. np.nanvar: "var",
  499. np.median: "median",
  500. np.nanmedian: "median",
  501. np.max: "max",
  502. np.nanmax: "max",
  503. np.min: "min",
  504. np.nanmin: "min",
  505. np.cumprod: "cumprod",
  506. np.nancumprod: "cumprod",
  507. np.cumsum: "cumsum",
  508. np.nancumsum: "cumsum",
  509. }
  510. def get_cython_func(arg: Callable) -> str | None:
  511. """
  512. if we define an internal function for this argument, return it
  513. """
  514. return _cython_table.get(arg)
  515. def is_builtin_func(arg):
  516. """
  517. if we define a builtin function for this argument, return it,
  518. otherwise return the arg
  519. """
  520. return _builtin_table.get(arg, arg)
  521. def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]:
  522. """
  523. If a name is missing then replace it by level_n, where n is the count
  524. .. versionadded:: 1.4.0
  525. Parameters
  526. ----------
  527. names : list-like
  528. list of column names or None values.
  529. Returns
  530. -------
  531. list
  532. list of column names with the None values replaced.
  533. """
  534. return [f"level_{i}" if name is None else name for i, name in enumerate(names)]