123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238 |
- import numpy as np
- import pytest
- from pandas.core.dtypes.common import is_integer
- import pandas as pd
- from pandas import (
- Index,
- Series,
- )
- import pandas._testing as tm
- from pandas.core.indexes.datetimes import Timestamp
- class TestSeriesQuantile:
- def test_quantile(self, datetime_series):
- q = datetime_series.quantile(0.1)
- assert q == np.percentile(datetime_series.dropna(), 10)
- q = datetime_series.quantile(0.9)
- assert q == np.percentile(datetime_series.dropna(), 90)
- # object dtype
- q = Series(datetime_series, dtype=object).quantile(0.9)
- assert q == np.percentile(datetime_series.dropna(), 90)
- # datetime64[ns] dtype
- dts = datetime_series.index.to_series()
- q = dts.quantile(0.2)
- assert q == Timestamp("2000-01-10 19:12:00")
- # timedelta64[ns] dtype
- tds = dts.diff()
- q = tds.quantile(0.25)
- assert q == pd.to_timedelta("24:00:00")
- # GH7661
- result = Series([np.timedelta64("NaT")]).sum()
- assert result == pd.Timedelta(0)
- msg = "percentiles should all be in the interval \\[0, 1\\]"
- for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
- with pytest.raises(ValueError, match=msg):
- datetime_series.quantile(invalid)
- def test_quantile_multi(self, datetime_series):
- qs = [0.1, 0.9]
- result = datetime_series.quantile(qs)
- expected = Series(
- [
- np.percentile(datetime_series.dropna(), 10),
- np.percentile(datetime_series.dropna(), 90),
- ],
- index=qs,
- name=datetime_series.name,
- )
- tm.assert_series_equal(result, expected)
- dts = datetime_series.index.to_series()
- dts.name = "xxx"
- result = dts.quantile((0.2, 0.2))
- expected = Series(
- [Timestamp("2000-01-10 19:12:00"), Timestamp("2000-01-10 19:12:00")],
- index=[0.2, 0.2],
- name="xxx",
- )
- tm.assert_series_equal(result, expected)
- result = datetime_series.quantile([])
- expected = Series(
- [], name=datetime_series.name, index=Index([], dtype=float), dtype="float64"
- )
- tm.assert_series_equal(result, expected)
- def test_quantile_interpolation(self, datetime_series):
- # see gh-10174
- # interpolation = linear (default case)
- q = datetime_series.quantile(0.1, interpolation="linear")
- assert q == np.percentile(datetime_series.dropna(), 10)
- q1 = datetime_series.quantile(0.1)
- assert q1 == np.percentile(datetime_series.dropna(), 10)
- # test with and without interpolation keyword
- assert q == q1
- def test_quantile_interpolation_dtype(self):
- # GH #10174
- # interpolation = linear (default case)
- q = Series([1, 3, 4]).quantile(0.5, interpolation="lower")
- assert q == np.percentile(np.array([1, 3, 4]), 50)
- assert is_integer(q)
- q = Series([1, 3, 4]).quantile(0.5, interpolation="higher")
- assert q == np.percentile(np.array([1, 3, 4]), 50)
- assert is_integer(q)
- def test_quantile_nan(self):
- # GH 13098
- s = Series([1, 2, 3, 4, np.nan])
- result = s.quantile(0.5)
- expected = 2.5
- assert result == expected
- # all nan/empty
- s1 = Series([], dtype=object)
- cases = [s1, Series([np.nan, np.nan])]
- for s in cases:
- res = s.quantile(0.5)
- assert np.isnan(res)
- res = s.quantile([0.5])
- tm.assert_series_equal(res, Series([np.nan], index=[0.5]))
- res = s.quantile([0.2, 0.3])
- tm.assert_series_equal(res, Series([np.nan, np.nan], index=[0.2, 0.3]))
- @pytest.mark.parametrize(
- "case",
- [
- [
- Timestamp("2011-01-01"),
- Timestamp("2011-01-02"),
- Timestamp("2011-01-03"),
- ],
- [
- Timestamp("2011-01-01", tz="US/Eastern"),
- Timestamp("2011-01-02", tz="US/Eastern"),
- Timestamp("2011-01-03", tz="US/Eastern"),
- ],
- [pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")],
- # NaT
- [
- Timestamp("2011-01-01"),
- Timestamp("2011-01-02"),
- Timestamp("2011-01-03"),
- pd.NaT,
- ],
- [
- Timestamp("2011-01-01", tz="US/Eastern"),
- Timestamp("2011-01-02", tz="US/Eastern"),
- Timestamp("2011-01-03", tz="US/Eastern"),
- pd.NaT,
- ],
- [
- pd.Timedelta("1 days"),
- pd.Timedelta("2 days"),
- pd.Timedelta("3 days"),
- pd.NaT,
- ],
- ],
- )
- def test_quantile_box(self, case):
- s = Series(case, name="XXX")
- res = s.quantile(0.5)
- assert res == case[1]
- res = s.quantile([0.5])
- exp = Series([case[1]], index=[0.5], name="XXX")
- tm.assert_series_equal(res, exp)
- def test_datetime_timedelta_quantiles(self):
- # covers #9694
- assert pd.isna(Series([], dtype="M8[ns]").quantile(0.5))
- assert pd.isna(Series([], dtype="m8[ns]").quantile(0.5))
- def test_quantile_nat(self):
- res = Series([pd.NaT, pd.NaT]).quantile(0.5)
- assert res is pd.NaT
- res = Series([pd.NaT, pd.NaT]).quantile([0.5])
- tm.assert_series_equal(res, Series([pd.NaT], index=[0.5]))
- @pytest.mark.parametrize(
- "values, dtype",
- [([0, 0, 0, 1, 2, 3], "Sparse[int]"), ([0.0, None, 1.0, 2.0], "Sparse[float]")],
- )
- def test_quantile_sparse(self, values, dtype):
- ser = Series(values, dtype=dtype)
- result = ser.quantile([0.5])
- expected = Series(np.asarray(ser)).quantile([0.5]).astype("Sparse[float]")
- tm.assert_series_equal(result, expected)
- def test_quantile_empty(self):
- # floats
- s = Series([], dtype="float64")
- res = s.quantile(0.5)
- assert np.isnan(res)
- res = s.quantile([0.5])
- exp = Series([np.nan], index=[0.5])
- tm.assert_series_equal(res, exp)
- # int
- s = Series([], dtype="int64")
- res = s.quantile(0.5)
- assert np.isnan(res)
- res = s.quantile([0.5])
- exp = Series([np.nan], index=[0.5])
- tm.assert_series_equal(res, exp)
- # datetime
- s = Series([], dtype="datetime64[ns]")
- res = s.quantile(0.5)
- assert res is pd.NaT
- res = s.quantile([0.5])
- exp = Series([pd.NaT], index=[0.5])
- tm.assert_series_equal(res, exp)
- @pytest.mark.parametrize("dtype", [int, float, "Int64"])
- def test_quantile_dtypes(self, dtype):
- result = Series([1, 2, 3], dtype=dtype).quantile(np.arange(0, 1, 0.25))
- expected = Series(np.arange(1, 3, 0.5), index=np.arange(0, 1, 0.25))
- if dtype == "Int64":
- expected = expected.astype("Float64")
- tm.assert_series_equal(result, expected)
- def test_quantile_all_na(self, any_int_ea_dtype):
- # GH#50681
- ser = Series([pd.NA, pd.NA], dtype=any_int_ea_dtype)
- with tm.assert_produces_warning(None):
- result = ser.quantile([0.1, 0.5])
- expected = Series([pd.NA, pd.NA], dtype=any_int_ea_dtype, index=[0.1, 0.5])
- tm.assert_series_equal(result, expected)
- def test_quantile_dtype_size(self, any_int_ea_dtype):
- # GH#50681
- ser = Series([pd.NA, pd.NA, 1], dtype=any_int_ea_dtype)
- result = ser.quantile([0.1, 0.5])
- expected = Series([1, 1], dtype=any_int_ea_dtype, index=[0.1, 0.5])
- tm.assert_series_equal(result, expected)
|