tile.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651
  1. """
  2. Quantilization functions and related stuff
  3. """
  4. from __future__ import annotations
  5. from typing import (
  6. Any,
  7. Callable,
  8. Literal,
  9. )
  10. import numpy as np
  11. from pandas._libs import (
  12. Timedelta,
  13. Timestamp,
  14. )
  15. from pandas._libs.lib import infer_dtype
  16. from pandas._typing import IntervalLeftRight
  17. from pandas.core.dtypes.common import (
  18. DT64NS_DTYPE,
  19. ensure_platform_int,
  20. is_bool_dtype,
  21. is_categorical_dtype,
  22. is_datetime64_dtype,
  23. is_datetime64tz_dtype,
  24. is_datetime_or_timedelta_dtype,
  25. is_extension_array_dtype,
  26. is_integer,
  27. is_list_like,
  28. is_numeric_dtype,
  29. is_scalar,
  30. is_timedelta64_dtype,
  31. )
  32. from pandas.core.dtypes.generic import ABCSeries
  33. from pandas.core.dtypes.missing import isna
  34. from pandas import (
  35. Categorical,
  36. Index,
  37. IntervalIndex,
  38. to_datetime,
  39. to_timedelta,
  40. )
  41. from pandas.core import nanops
  42. import pandas.core.algorithms as algos
  43. def cut(
  44. x,
  45. bins,
  46. right: bool = True,
  47. labels=None,
  48. retbins: bool = False,
  49. precision: int = 3,
  50. include_lowest: bool = False,
  51. duplicates: str = "raise",
  52. ordered: bool = True,
  53. ):
  54. """
  55. Bin values into discrete intervals.
  56. Use `cut` when you need to segment and sort data values into bins. This
  57. function is also useful for going from a continuous variable to a
  58. categorical variable. For example, `cut` could convert ages to groups of
  59. age ranges. Supports binning into an equal number of bins, or a
  60. pre-specified array of bins.
  61. Parameters
  62. ----------
  63. x : array-like
  64. The input array to be binned. Must be 1-dimensional.
  65. bins : int, sequence of scalars, or IntervalIndex
  66. The criteria to bin by.
  67. * int : Defines the number of equal-width bins in the range of `x`. The
  68. range of `x` is extended by .1% on each side to include the minimum
  69. and maximum values of `x`.
  70. * sequence of scalars : Defines the bin edges allowing for non-uniform
  71. width. No extension of the range of `x` is done.
  72. * IntervalIndex : Defines the exact bins to be used. Note that
  73. IntervalIndex for `bins` must be non-overlapping.
  74. right : bool, default True
  75. Indicates whether `bins` includes the rightmost edge or not. If
  76. ``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]``
  77. indicate (1,2], (2,3], (3,4]. This argument is ignored when
  78. `bins` is an IntervalIndex.
  79. labels : array or False, default None
  80. Specifies the labels for the returned bins. Must be the same length as
  81. the resulting bins. If False, returns only integer indicators of the
  82. bins. This affects the type of the output container (see below).
  83. This argument is ignored when `bins` is an IntervalIndex. If True,
  84. raises an error. When `ordered=False`, labels must be provided.
  85. retbins : bool, default False
  86. Whether to return the bins or not. Useful when bins is provided
  87. as a scalar.
  88. precision : int, default 3
  89. The precision at which to store and display the bins labels.
  90. include_lowest : bool, default False
  91. Whether the first interval should be left-inclusive or not.
  92. duplicates : {default 'raise', 'drop'}, optional
  93. If bin edges are not unique, raise ValueError or drop non-uniques.
  94. ordered : bool, default True
  95. Whether the labels are ordered or not. Applies to returned types
  96. Categorical and Series (with Categorical dtype). If True,
  97. the resulting categorical will be ordered. If False, the resulting
  98. categorical will be unordered (labels must be provided).
  99. .. versionadded:: 1.1.0
  100. Returns
  101. -------
  102. out : Categorical, Series, or ndarray
  103. An array-like object representing the respective bin for each value
  104. of `x`. The type depends on the value of `labels`.
  105. * None (default) : returns a Series for Series `x` or a
  106. Categorical for all other inputs. The values stored within
  107. are Interval dtype.
  108. * sequence of scalars : returns a Series for Series `x` or a
  109. Categorical for all other inputs. The values stored within
  110. are whatever the type in the sequence is.
  111. * False : returns an ndarray of integers.
  112. bins : numpy.ndarray or IntervalIndex.
  113. The computed or specified bins. Only returned when `retbins=True`.
  114. For scalar or sequence `bins`, this is an ndarray with the computed
  115. bins. If set `duplicates=drop`, `bins` will drop non-unique bin. For
  116. an IntervalIndex `bins`, this is equal to `bins`.
  117. See Also
  118. --------
  119. qcut : Discretize variable into equal-sized buckets based on rank
  120. or based on sample quantiles.
  121. Categorical : Array type for storing data that come from a
  122. fixed set of values.
  123. Series : One-dimensional array with axis labels (including time series).
  124. IntervalIndex : Immutable Index implementing an ordered, sliceable set.
  125. Notes
  126. -----
  127. Any NA values will be NA in the result. Out of bounds values will be NA in
  128. the resulting Series or Categorical object.
  129. Reference :ref:`the user guide <reshaping.tile.cut>` for more examples.
  130. Examples
  131. --------
  132. Discretize into three equal-sized bins.
  133. >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3)
  134. ... # doctest: +ELLIPSIS
  135. [(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ...
  136. Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ...
  137. >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, retbins=True)
  138. ... # doctest: +ELLIPSIS
  139. ([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ...
  140. Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ...
  141. array([0.994, 3. , 5. , 7. ]))
  142. Discovers the same bins, but assign them specific labels. Notice that
  143. the returned Categorical's categories are `labels` and is ordered.
  144. >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]),
  145. ... 3, labels=["bad", "medium", "good"])
  146. ['bad', 'good', 'medium', 'medium', 'good', 'bad']
  147. Categories (3, object): ['bad' < 'medium' < 'good']
  148. ``ordered=False`` will result in unordered categories when labels are passed.
  149. This parameter can be used to allow non-unique labels:
  150. >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3,
  151. ... labels=["B", "A", "B"], ordered=False)
  152. ['B', 'B', 'A', 'A', 'B', 'B']
  153. Categories (2, object): ['A', 'B']
  154. ``labels=False`` implies you just want the bins back.
  155. >>> pd.cut([0, 1, 1, 2], bins=4, labels=False)
  156. array([0, 1, 1, 3])
  157. Passing a Series as an input returns a Series with categorical dtype:
  158. >>> s = pd.Series(np.array([2, 4, 6, 8, 10]),
  159. ... index=['a', 'b', 'c', 'd', 'e'])
  160. >>> pd.cut(s, 3)
  161. ... # doctest: +ELLIPSIS
  162. a (1.992, 4.667]
  163. b (1.992, 4.667]
  164. c (4.667, 7.333]
  165. d (7.333, 10.0]
  166. e (7.333, 10.0]
  167. dtype: category
  168. Categories (3, interval[float64, right]): [(1.992, 4.667] < (4.667, ...
  169. Passing a Series as an input returns a Series with mapping value.
  170. It is used to map numerically to intervals based on bins.
  171. >>> s = pd.Series(np.array([2, 4, 6, 8, 10]),
  172. ... index=['a', 'b', 'c', 'd', 'e'])
  173. >>> pd.cut(s, [0, 2, 4, 6, 8, 10], labels=False, retbins=True, right=False)
  174. ... # doctest: +ELLIPSIS
  175. (a 1.0
  176. b 2.0
  177. c 3.0
  178. d 4.0
  179. e NaN
  180. dtype: float64,
  181. array([ 0, 2, 4, 6, 8, 10]))
  182. Use `drop` optional when bins is not unique
  183. >>> pd.cut(s, [0, 2, 4, 6, 10, 10], labels=False, retbins=True,
  184. ... right=False, duplicates='drop')
  185. ... # doctest: +ELLIPSIS
  186. (a 1.0
  187. b 2.0
  188. c 3.0
  189. d 3.0
  190. e NaN
  191. dtype: float64,
  192. array([ 0, 2, 4, 6, 10]))
  193. Passing an IntervalIndex for `bins` results in those categories exactly.
  194. Notice that values not covered by the IntervalIndex are set to NaN. 0
  195. is to the left of the first bin (which is closed on the right), and 1.5
  196. falls between two bins.
  197. >>> bins = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)])
  198. >>> pd.cut([0, 0.5, 1.5, 2.5, 4.5], bins)
  199. [NaN, (0.0, 1.0], NaN, (2.0, 3.0], (4.0, 5.0]]
  200. Categories (3, interval[int64, right]): [(0, 1] < (2, 3] < (4, 5]]
  201. """
  202. # NOTE: this binning code is changed a bit from histogram for var(x) == 0
  203. original = x
  204. x = _preprocess_for_cut(x)
  205. x, dtype = _coerce_to_type(x)
  206. if not np.iterable(bins):
  207. if is_scalar(bins) and bins < 1:
  208. raise ValueError("`bins` should be a positive integer.")
  209. try: # for array-like
  210. sz = x.size
  211. except AttributeError:
  212. x = np.asarray(x)
  213. sz = x.size
  214. if sz == 0:
  215. raise ValueError("Cannot cut empty array")
  216. rng = (nanops.nanmin(x), nanops.nanmax(x))
  217. mn, mx = (mi + 0.0 for mi in rng)
  218. if np.isinf(mn) or np.isinf(mx):
  219. # GH 24314
  220. raise ValueError(
  221. "cannot specify integer `bins` when input data contains infinity"
  222. )
  223. if mn == mx: # adjust end points before binning
  224. mn -= 0.001 * abs(mn) if mn != 0 else 0.001
  225. mx += 0.001 * abs(mx) if mx != 0 else 0.001
  226. bins = np.linspace(mn, mx, bins + 1, endpoint=True)
  227. else: # adjust end points after binning
  228. bins = np.linspace(mn, mx, bins + 1, endpoint=True)
  229. adj = (mx - mn) * 0.001 # 0.1% of the range
  230. if right:
  231. bins[0] -= adj
  232. else:
  233. bins[-1] += adj
  234. elif isinstance(bins, IntervalIndex):
  235. if bins.is_overlapping:
  236. raise ValueError("Overlapping IntervalIndex is not accepted.")
  237. else:
  238. if is_datetime64tz_dtype(bins):
  239. bins = np.asarray(bins, dtype=DT64NS_DTYPE)
  240. else:
  241. bins = np.asarray(bins)
  242. bins = _convert_bin_to_numeric_type(bins, dtype)
  243. # GH 26045: cast to float64 to avoid an overflow
  244. if (np.diff(bins.astype("float64")) < 0).any():
  245. raise ValueError("bins must increase monotonically.")
  246. fac, bins = _bins_to_cuts(
  247. x,
  248. bins,
  249. right=right,
  250. labels=labels,
  251. precision=precision,
  252. include_lowest=include_lowest,
  253. dtype=dtype,
  254. duplicates=duplicates,
  255. ordered=ordered,
  256. )
  257. return _postprocess_for_cut(fac, bins, retbins, dtype, original)
  258. def qcut(
  259. x,
  260. q,
  261. labels=None,
  262. retbins: bool = False,
  263. precision: int = 3,
  264. duplicates: str = "raise",
  265. ):
  266. """
  267. Quantile-based discretization function.
  268. Discretize variable into equal-sized buckets based on rank or based
  269. on sample quantiles. For example 1000 values for 10 quantiles would
  270. produce a Categorical object indicating quantile membership for each data point.
  271. Parameters
  272. ----------
  273. x : 1d ndarray or Series
  274. q : int or list-like of float
  275. Number of quantiles. 10 for deciles, 4 for quartiles, etc. Alternately
  276. array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles.
  277. labels : array or False, default None
  278. Used as labels for the resulting bins. Must be of the same length as
  279. the resulting bins. If False, return only integer indicators of the
  280. bins. If True, raises an error.
  281. retbins : bool, optional
  282. Whether to return the (bins, labels) or not. Can be useful if bins
  283. is given as a scalar.
  284. precision : int, optional
  285. The precision at which to store and display the bins labels.
  286. duplicates : {default 'raise', 'drop'}, optional
  287. If bin edges are not unique, raise ValueError or drop non-uniques.
  288. Returns
  289. -------
  290. out : Categorical or Series or array of integers if labels is False
  291. The return type (Categorical or Series) depends on the input: a Series
  292. of type category if input is a Series else Categorical. Bins are
  293. represented as categories when categorical data is returned.
  294. bins : ndarray of floats
  295. Returned only if `retbins` is True.
  296. Notes
  297. -----
  298. Out of bounds values will be NA in the resulting Categorical object
  299. Examples
  300. --------
  301. >>> pd.qcut(range(5), 4)
  302. ... # doctest: +ELLIPSIS
  303. [(-0.001, 1.0], (-0.001, 1.0], (1.0, 2.0], (2.0, 3.0], (3.0, 4.0]]
  304. Categories (4, interval[float64, right]): [(-0.001, 1.0] < (1.0, 2.0] ...
  305. >>> pd.qcut(range(5), 3, labels=["good", "medium", "bad"])
  306. ... # doctest: +SKIP
  307. [good, good, medium, bad, bad]
  308. Categories (3, object): [good < medium < bad]
  309. >>> pd.qcut(range(5), 4, labels=False)
  310. array([0, 0, 1, 2, 3])
  311. """
  312. original = x
  313. x = _preprocess_for_cut(x)
  314. x, dtype = _coerce_to_type(x)
  315. quantiles = np.linspace(0, 1, q + 1) if is_integer(q) else q
  316. x_np = np.asarray(x)
  317. x_np = x_np[~np.isnan(x_np)]
  318. bins = np.quantile(x_np, quantiles)
  319. fac, bins = _bins_to_cuts(
  320. x,
  321. bins,
  322. labels=labels,
  323. precision=precision,
  324. include_lowest=True,
  325. dtype=dtype,
  326. duplicates=duplicates,
  327. )
  328. return _postprocess_for_cut(fac, bins, retbins, dtype, original)
  329. def _bins_to_cuts(
  330. x,
  331. bins: np.ndarray,
  332. right: bool = True,
  333. labels=None,
  334. precision: int = 3,
  335. include_lowest: bool = False,
  336. dtype=None,
  337. duplicates: str = "raise",
  338. ordered: bool = True,
  339. ):
  340. if not ordered and labels is None:
  341. raise ValueError("'labels' must be provided if 'ordered = False'")
  342. if duplicates not in ["raise", "drop"]:
  343. raise ValueError(
  344. "invalid value for 'duplicates' parameter, valid options are: raise, drop"
  345. )
  346. if isinstance(bins, IntervalIndex):
  347. # we have a fast-path here
  348. ids = bins.get_indexer(x)
  349. result = Categorical.from_codes(ids, categories=bins, ordered=True)
  350. return result, bins
  351. unique_bins = algos.unique(bins)
  352. if len(unique_bins) < len(bins) and len(bins) != 2:
  353. if duplicates == "raise":
  354. raise ValueError(
  355. f"Bin edges must be unique: {repr(bins)}.\n"
  356. f"You can drop duplicate edges by setting the 'duplicates' kwarg"
  357. )
  358. bins = unique_bins
  359. side: Literal["left", "right"] = "left" if right else "right"
  360. ids = ensure_platform_int(bins.searchsorted(x, side=side))
  361. if include_lowest:
  362. ids[np.asarray(x) == bins[0]] = 1
  363. na_mask = isna(x) | (ids == len(bins)) | (ids == 0)
  364. has_nas = na_mask.any()
  365. if labels is not False:
  366. if not (labels is None or is_list_like(labels)):
  367. raise ValueError(
  368. "Bin labels must either be False, None or passed in as a "
  369. "list-like argument"
  370. )
  371. if labels is None:
  372. labels = _format_labels(
  373. bins, precision, right=right, include_lowest=include_lowest, dtype=dtype
  374. )
  375. elif ordered and len(set(labels)) != len(labels):
  376. raise ValueError(
  377. "labels must be unique if ordered=True; pass ordered=False "
  378. "for duplicate labels"
  379. )
  380. else:
  381. if len(labels) != len(bins) - 1:
  382. raise ValueError(
  383. "Bin labels must be one fewer than the number of bin edges"
  384. )
  385. if not is_categorical_dtype(labels):
  386. labels = Categorical(
  387. labels,
  388. categories=labels if len(set(labels)) == len(labels) else None,
  389. ordered=ordered,
  390. )
  391. # TODO: handle mismatch between categorical label order and pandas.cut order.
  392. np.putmask(ids, na_mask, 0)
  393. result = algos.take_nd(labels, ids - 1)
  394. else:
  395. result = ids - 1
  396. if has_nas:
  397. result = result.astype(np.float64)
  398. np.putmask(result, na_mask, np.nan)
  399. return result, bins
  400. def _coerce_to_type(x):
  401. """
  402. if the passed data is of datetime/timedelta, bool or nullable int type,
  403. this method converts it to numeric so that cut or qcut method can
  404. handle it
  405. """
  406. dtype = None
  407. if is_datetime64tz_dtype(x.dtype):
  408. dtype = x.dtype
  409. elif is_datetime64_dtype(x.dtype):
  410. x = to_datetime(x).astype("datetime64[ns]", copy=False)
  411. dtype = np.dtype("datetime64[ns]")
  412. elif is_timedelta64_dtype(x.dtype):
  413. x = to_timedelta(x)
  414. dtype = np.dtype("timedelta64[ns]")
  415. elif is_bool_dtype(x.dtype):
  416. # GH 20303
  417. x = x.astype(np.int64)
  418. # To support cut and qcut for IntegerArray we convert to float dtype.
  419. # Will properly support in the future.
  420. # https://github.com/pandas-dev/pandas/pull/31290
  421. # https://github.com/pandas-dev/pandas/issues/31389
  422. elif is_extension_array_dtype(x.dtype) and is_numeric_dtype(x.dtype):
  423. x = x.to_numpy(dtype=np.float64, na_value=np.nan)
  424. if dtype is not None:
  425. # GH 19768: force NaT to NaN during integer conversion
  426. x = np.where(x.notna(), x.view(np.int64), np.nan)
  427. return x, dtype
  428. def _convert_bin_to_numeric_type(bins, dtype):
  429. """
  430. if the passed bin is of datetime/timedelta type,
  431. this method converts it to integer
  432. Parameters
  433. ----------
  434. bins : list-like of bins
  435. dtype : dtype of data
  436. Raises
  437. ------
  438. ValueError if bins are not of a compat dtype to dtype
  439. """
  440. bins_dtype = infer_dtype(bins, skipna=False)
  441. if is_timedelta64_dtype(dtype):
  442. if bins_dtype in ["timedelta", "timedelta64"]:
  443. bins = to_timedelta(bins).view(np.int64)
  444. else:
  445. raise ValueError("bins must be of timedelta64 dtype")
  446. elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
  447. if bins_dtype in ["datetime", "datetime64"]:
  448. bins = to_datetime(bins)
  449. if is_datetime64_dtype(bins):
  450. # As of 2.0, to_datetime may give non-nano, so we need to convert
  451. # here until the rest of this file recognizes non-nano
  452. bins = bins.astype("datetime64[ns]", copy=False)
  453. bins = bins.view(np.int64)
  454. else:
  455. raise ValueError("bins must be of datetime64 dtype")
  456. return bins
  457. def _convert_bin_to_datelike_type(bins, dtype):
  458. """
  459. Convert bins to a DatetimeIndex or TimedeltaIndex if the original dtype is
  460. datelike
  461. Parameters
  462. ----------
  463. bins : list-like of bins
  464. dtype : dtype of data
  465. Returns
  466. -------
  467. bins : Array-like of bins, DatetimeIndex or TimedeltaIndex if dtype is
  468. datelike
  469. """
  470. if is_datetime64tz_dtype(dtype):
  471. bins = to_datetime(bins.astype(np.int64), utc=True).tz_convert(dtype.tz)
  472. elif is_datetime_or_timedelta_dtype(dtype):
  473. bins = Index(bins.astype(np.int64), dtype=dtype)
  474. return bins
  475. def _format_labels(
  476. bins, precision: int, right: bool = True, include_lowest: bool = False, dtype=None
  477. ):
  478. """based on the dtype, return our labels"""
  479. closed: IntervalLeftRight = "right" if right else "left"
  480. formatter: Callable[[Any], Timestamp] | Callable[[Any], Timedelta]
  481. if is_datetime64tz_dtype(dtype):
  482. formatter = lambda x: Timestamp(x, tz=dtype.tz)
  483. adjust = lambda x: x - Timedelta("1ns")
  484. elif is_datetime64_dtype(dtype):
  485. formatter = Timestamp
  486. adjust = lambda x: x - Timedelta("1ns")
  487. elif is_timedelta64_dtype(dtype):
  488. formatter = Timedelta
  489. adjust = lambda x: x - Timedelta("1ns")
  490. else:
  491. precision = _infer_precision(precision, bins)
  492. formatter = lambda x: _round_frac(x, precision)
  493. adjust = lambda x: x - 10 ** (-precision)
  494. breaks = [formatter(b) for b in bins]
  495. if right and include_lowest:
  496. # adjust lhs of first interval by precision to account for being right closed
  497. breaks[0] = adjust(breaks[0])
  498. return IntervalIndex.from_breaks(breaks, closed=closed)
  499. def _preprocess_for_cut(x):
  500. """
  501. handles preprocessing for cut where we convert passed
  502. input to array, strip the index information and store it
  503. separately
  504. """
  505. # Check that the passed array is a Pandas or Numpy object
  506. # We don't want to strip away a Pandas data-type here (e.g. datetimetz)
  507. ndim = getattr(x, "ndim", None)
  508. if ndim is None:
  509. x = np.asarray(x)
  510. if x.ndim != 1:
  511. raise ValueError("Input array must be 1 dimensional")
  512. return x
  513. def _postprocess_for_cut(fac, bins, retbins: bool, dtype, original):
  514. """
  515. handles post processing for the cut method where
  516. we combine the index information if the originally passed
  517. datatype was a series
  518. """
  519. if isinstance(original, ABCSeries):
  520. fac = original._constructor(fac, index=original.index, name=original.name)
  521. if not retbins:
  522. return fac
  523. bins = _convert_bin_to_datelike_type(bins, dtype)
  524. return fac, bins
  525. def _round_frac(x, precision: int):
  526. """
  527. Round the fractional part of the given number
  528. """
  529. if not np.isfinite(x) or x == 0:
  530. return x
  531. else:
  532. frac, whole = np.modf(x)
  533. if whole == 0:
  534. digits = -int(np.floor(np.log10(abs(frac)))) - 1 + precision
  535. else:
  536. digits = precision
  537. return np.around(x, digits)
  538. def _infer_precision(base_precision: int, bins) -> int:
  539. """
  540. Infer an appropriate precision for _round_frac
  541. """
  542. for precision in range(base_precision, 20):
  543. levels = [_round_frac(b, precision) for b in bins]
  544. if algos.unique(levels).size == bins.size:
  545. return precision
  546. return base_precision # default