123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308 |
- import numpy as np
- import pytest
- from pandas import (
- NaT,
- Timestamp,
- isna,
- )
- from pandas.core.arrays.sparse import (
- SparseArray,
- SparseDtype,
- )
- class TestReductions:
- @pytest.mark.parametrize(
- "data,pos,neg",
- [
- ([True, True, True], True, False),
- ([1, 2, 1], 1, 0),
- ([1.0, 2.0, 1.0], 1.0, 0.0),
- ],
- )
- def test_all(self, data, pos, neg):
- # GH#17570
- out = SparseArray(data).all()
- assert out
- out = SparseArray(data, fill_value=pos).all()
- assert out
- data[1] = neg
- out = SparseArray(data).all()
- assert not out
- out = SparseArray(data, fill_value=pos).all()
- assert not out
- @pytest.mark.parametrize(
- "data,pos,neg",
- [
- ([True, True, True], True, False),
- ([1, 2, 1], 1, 0),
- ([1.0, 2.0, 1.0], 1.0, 0.0),
- ],
- )
- def test_numpy_all(self, data, pos, neg):
- # GH#17570
- out = np.all(SparseArray(data))
- assert out
- out = np.all(SparseArray(data, fill_value=pos))
- assert out
- data[1] = neg
- out = np.all(SparseArray(data))
- assert not out
- out = np.all(SparseArray(data, fill_value=pos))
- assert not out
- # raises with a different message on py2.
- msg = "the 'out' parameter is not supported"
- with pytest.raises(ValueError, match=msg):
- np.all(SparseArray(data), out=np.array([]))
- @pytest.mark.parametrize(
- "data,pos,neg",
- [
- ([False, True, False], True, False),
- ([0, 2, 0], 2, 0),
- ([0.0, 2.0, 0.0], 2.0, 0.0),
- ],
- )
- def test_any(self, data, pos, neg):
- # GH#17570
- out = SparseArray(data).any()
- assert out
- out = SparseArray(data, fill_value=pos).any()
- assert out
- data[1] = neg
- out = SparseArray(data).any()
- assert not out
- out = SparseArray(data, fill_value=pos).any()
- assert not out
- @pytest.mark.parametrize(
- "data,pos,neg",
- [
- ([False, True, False], True, False),
- ([0, 2, 0], 2, 0),
- ([0.0, 2.0, 0.0], 2.0, 0.0),
- ],
- )
- def test_numpy_any(self, data, pos, neg):
- # GH#17570
- out = np.any(SparseArray(data))
- assert out
- out = np.any(SparseArray(data, fill_value=pos))
- assert out
- data[1] = neg
- out = np.any(SparseArray(data))
- assert not out
- out = np.any(SparseArray(data, fill_value=pos))
- assert not out
- msg = "the 'out' parameter is not supported"
- with pytest.raises(ValueError, match=msg):
- np.any(SparseArray(data), out=out)
- def test_sum(self):
- data = np.arange(10).astype(float)
- out = SparseArray(data).sum()
- assert out == 45.0
- data[5] = np.nan
- out = SparseArray(data, fill_value=2).sum()
- assert out == 40.0
- out = SparseArray(data, fill_value=np.nan).sum()
- assert out == 40.0
- @pytest.mark.parametrize(
- "arr",
- [np.array([0, 1, np.nan, 1]), np.array([0, 1, 1])],
- )
- @pytest.mark.parametrize("fill_value", [0, 1, np.nan])
- @pytest.mark.parametrize("min_count, expected", [(3, 2), (4, np.nan)])
- def test_sum_min_count(self, arr, fill_value, min_count, expected):
- # GH#25777
- sparray = SparseArray(arr, fill_value=fill_value)
- result = sparray.sum(min_count=min_count)
- if np.isnan(expected):
- assert np.isnan(result)
- else:
- assert result == expected
- def test_bool_sum_min_count(self):
- spar_bool = SparseArray([False, True] * 5, dtype=np.bool_, fill_value=True)
- res = spar_bool.sum(min_count=1)
- assert res == 5
- res = spar_bool.sum(min_count=11)
- assert isna(res)
- def test_numpy_sum(self):
- data = np.arange(10).astype(float)
- out = np.sum(SparseArray(data))
- assert out == 45.0
- data[5] = np.nan
- out = np.sum(SparseArray(data, fill_value=2))
- assert out == 40.0
- out = np.sum(SparseArray(data, fill_value=np.nan))
- assert out == 40.0
- msg = "the 'dtype' parameter is not supported"
- with pytest.raises(ValueError, match=msg):
- np.sum(SparseArray(data), dtype=np.int64)
- msg = "the 'out' parameter is not supported"
- with pytest.raises(ValueError, match=msg):
- np.sum(SparseArray(data), out=out)
- def test_mean(self):
- data = np.arange(10).astype(float)
- out = SparseArray(data).mean()
- assert out == 4.5
- data[5] = np.nan
- out = SparseArray(data).mean()
- assert out == 40.0 / 9
- def test_numpy_mean(self):
- data = np.arange(10).astype(float)
- out = np.mean(SparseArray(data))
- assert out == 4.5
- data[5] = np.nan
- out = np.mean(SparseArray(data))
- assert out == 40.0 / 9
- msg = "the 'dtype' parameter is not supported"
- with pytest.raises(ValueError, match=msg):
- np.mean(SparseArray(data), dtype=np.int64)
- msg = "the 'out' parameter is not supported"
- with pytest.raises(ValueError, match=msg):
- np.mean(SparseArray(data), out=out)
- class TestMinMax:
- @pytest.mark.parametrize(
- "raw_data,max_expected,min_expected",
- [
- (np.arange(5.0), [4], [0]),
- (-np.arange(5.0), [0], [-4]),
- (np.array([0, 1, 2, np.nan, 4]), [4], [0]),
- (np.array([np.nan] * 5), [np.nan], [np.nan]),
- (np.array([]), [np.nan], [np.nan]),
- ],
- )
- def test_nan_fill_value(self, raw_data, max_expected, min_expected):
- arr = SparseArray(raw_data)
- max_result = arr.max()
- min_result = arr.min()
- assert max_result in max_expected
- assert min_result in min_expected
- max_result = arr.max(skipna=False)
- min_result = arr.min(skipna=False)
- if np.isnan(raw_data).any():
- assert np.isnan(max_result)
- assert np.isnan(min_result)
- else:
- assert max_result in max_expected
- assert min_result in min_expected
- @pytest.mark.parametrize(
- "fill_value,max_expected,min_expected",
- [
- (100, 100, 0),
- (-100, 1, -100),
- ],
- )
- def test_fill_value(self, fill_value, max_expected, min_expected):
- arr = SparseArray(
- np.array([fill_value, 0, 1]), dtype=SparseDtype("int", fill_value)
- )
- max_result = arr.max()
- assert max_result == max_expected
- min_result = arr.min()
- assert min_result == min_expected
- def test_only_fill_value(self):
- fv = 100
- arr = SparseArray(np.array([fv, fv, fv]), dtype=SparseDtype("int", fv))
- assert len(arr._valid_sp_values) == 0
- assert arr.max() == fv
- assert arr.min() == fv
- assert arr.max(skipna=False) == fv
- assert arr.min(skipna=False) == fv
- @pytest.mark.parametrize("func", ["min", "max"])
- @pytest.mark.parametrize("data", [np.array([]), np.array([np.nan, np.nan])])
- @pytest.mark.parametrize(
- "dtype,expected",
- [
- (SparseDtype(np.float64, np.nan), np.nan),
- (SparseDtype(np.float64, 5.0), np.nan),
- (SparseDtype("datetime64[ns]", NaT), NaT),
- (SparseDtype("datetime64[ns]", Timestamp("2018-05-05")), NaT),
- ],
- )
- def test_na_value_if_no_valid_values(self, func, data, dtype, expected):
- arr = SparseArray(data, dtype=dtype)
- result = getattr(arr, func)()
- if expected is NaT:
- # TODO: pin down whether we wrap datetime64("NaT")
- assert result is NaT or np.isnat(result)
- else:
- assert np.isnan(result)
- class TestArgmaxArgmin:
- @pytest.mark.parametrize(
- "arr,argmax_expected,argmin_expected",
- [
- (SparseArray([1, 2, 0, 1, 2]), 1, 2),
- (SparseArray([-1, -2, 0, -1, -2]), 2, 1),
- (SparseArray([np.nan, 1, 0, 0, np.nan, -1]), 1, 5),
- (SparseArray([np.nan, 1, 0, 0, np.nan, 2]), 5, 2),
- (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=-1), 5, 2),
- (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=0), 5, 2),
- (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=1), 5, 2),
- (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=2), 5, 2),
- (SparseArray([np.nan, 1, 0, 0, np.nan, 2], fill_value=3), 5, 2),
- (SparseArray([0] * 10 + [-1], fill_value=0), 0, 10),
- (SparseArray([0] * 10 + [-1], fill_value=-1), 0, 10),
- (SparseArray([0] * 10 + [-1], fill_value=1), 0, 10),
- (SparseArray([-1] + [0] * 10, fill_value=0), 1, 0),
- (SparseArray([1] + [0] * 10, fill_value=0), 0, 1),
- (SparseArray([-1] + [0] * 10, fill_value=-1), 1, 0),
- (SparseArray([1] + [0] * 10, fill_value=1), 0, 1),
- ],
- )
- def test_argmax_argmin(self, arr, argmax_expected, argmin_expected):
- argmax_result = arr.argmax()
- argmin_result = arr.argmin()
- assert argmax_result == argmax_expected
- assert argmin_result == argmin_expected
- @pytest.mark.parametrize(
- "arr,method",
- [(SparseArray([]), "argmax"), (SparseArray([]), "argmin")],
- )
- def test_empty_array(self, arr, method):
- msg = f"attempt to get {method} of an empty sequence"
- with pytest.raises(ValueError, match=msg):
- arr.argmax() if method == "argmax" else arr.argmin()
|