123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286 |
- import numpy as np
- import pytest
- from pandas._libs.sparse import IntIndex
- import pandas.util._test_decorators as td
- import pandas as pd
- from pandas import isna
- import pandas._testing as tm
- from pandas.core.arrays.sparse import (
- SparseArray,
- SparseDtype,
- )
- class TestConstructors:
- def test_constructor_dtype(self):
- arr = SparseArray([np.nan, 1, 2, np.nan])
- assert arr.dtype == SparseDtype(np.float64, np.nan)
- assert arr.dtype.subtype == np.float64
- assert np.isnan(arr.fill_value)
- arr = SparseArray([np.nan, 1, 2, np.nan], fill_value=0)
- assert arr.dtype == SparseDtype(np.float64, 0)
- assert arr.fill_value == 0
- arr = SparseArray([0, 1, 2, 4], dtype=np.float64)
- assert arr.dtype == SparseDtype(np.float64, np.nan)
- assert np.isnan(arr.fill_value)
- arr = SparseArray([0, 1, 2, 4], dtype=np.int64)
- assert arr.dtype == SparseDtype(np.int64, 0)
- assert arr.fill_value == 0
- arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=np.int64)
- assert arr.dtype == SparseDtype(np.int64, 0)
- assert arr.fill_value == 0
- arr = SparseArray([0, 1, 2, 4], dtype=None)
- assert arr.dtype == SparseDtype(np.int64, 0)
- assert arr.fill_value == 0
- arr = SparseArray([0, 1, 2, 4], fill_value=0, dtype=None)
- assert arr.dtype == SparseDtype(np.int64, 0)
- assert arr.fill_value == 0
- def test_constructor_dtype_str(self):
- result = SparseArray([1, 2, 3], dtype="int")
- expected = SparseArray([1, 2, 3], dtype=int)
- tm.assert_sp_array_equal(result, expected)
- def test_constructor_sparse_dtype(self):
- result = SparseArray([1, 0, 0, 1], dtype=SparseDtype("int64", -1))
- expected = SparseArray([1, 0, 0, 1], fill_value=-1, dtype=np.int64)
- tm.assert_sp_array_equal(result, expected)
- assert result.sp_values.dtype == np.dtype("int64")
- def test_constructor_sparse_dtype_str(self):
- result = SparseArray([1, 0, 0, 1], dtype="Sparse[int32]")
- expected = SparseArray([1, 0, 0, 1], dtype=np.int32)
- tm.assert_sp_array_equal(result, expected)
- assert result.sp_values.dtype == np.dtype("int32")
- def test_constructor_object_dtype(self):
- # GH#11856
- arr = SparseArray(["A", "A", np.nan, "B"], dtype=object)
- assert arr.dtype == SparseDtype(object)
- assert np.isnan(arr.fill_value)
- arr = SparseArray(["A", "A", np.nan, "B"], dtype=object, fill_value="A")
- assert arr.dtype == SparseDtype(object, "A")
- assert arr.fill_value == "A"
- def test_constructor_object_dtype_bool_fill(self):
- # GH#17574
- data = [False, 0, 100.0, 0.0]
- arr = SparseArray(data, dtype=object, fill_value=False)
- assert arr.dtype == SparseDtype(object, False)
- assert arr.fill_value is False
- arr_expected = np.array(data, dtype=object)
- it = (type(x) == type(y) and x == y for x, y in zip(arr, arr_expected))
- assert np.fromiter(it, dtype=np.bool_).all()
- @pytest.mark.parametrize("dtype", [SparseDtype(int, 0), int])
- def test_constructor_na_dtype(self, dtype):
- with pytest.raises(ValueError, match="Cannot convert"):
- SparseArray([0, 1, np.nan], dtype=dtype)
- def test_constructor_warns_when_losing_timezone(self):
- # GH#32501 warn when losing timezone information
- dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
- expected = SparseArray(np.asarray(dti, dtype="datetime64[ns]"))
- with tm.assert_produces_warning(UserWarning):
- result = SparseArray(dti)
- tm.assert_sp_array_equal(result, expected)
- with tm.assert_produces_warning(UserWarning):
- result = SparseArray(pd.Series(dti))
- tm.assert_sp_array_equal(result, expected)
- def test_constructor_spindex_dtype(self):
- arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))
- # TODO: actionable?
- # XXX: Behavior change: specifying SparseIndex no longer changes the
- # fill_value
- expected = SparseArray([0, 1, 2, 0], kind="integer")
- tm.assert_sp_array_equal(arr, expected)
- assert arr.dtype == SparseDtype(np.int64)
- assert arr.fill_value == 0
- arr = SparseArray(
- data=[1, 2, 3],
- sparse_index=IntIndex(4, [1, 2, 3]),
- dtype=np.int64,
- fill_value=0,
- )
- exp = SparseArray([0, 1, 2, 3], dtype=np.int64, fill_value=0)
- tm.assert_sp_array_equal(arr, exp)
- assert arr.dtype == SparseDtype(np.int64)
- assert arr.fill_value == 0
- arr = SparseArray(
- data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=np.int64
- )
- exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=np.int64)
- tm.assert_sp_array_equal(arr, exp)
- assert arr.dtype == SparseDtype(np.int64)
- assert arr.fill_value == 0
- arr = SparseArray(
- data=[1, 2, 3],
- sparse_index=IntIndex(4, [1, 2, 3]),
- dtype=None,
- fill_value=0,
- )
- exp = SparseArray([0, 1, 2, 3], dtype=None)
- tm.assert_sp_array_equal(arr, exp)
- assert arr.dtype == SparseDtype(np.int64)
- assert arr.fill_value == 0
- @pytest.mark.parametrize("sparse_index", [None, IntIndex(1, [0])])
- def test_constructor_spindex_dtype_scalar(self, sparse_index):
- # scalar input
- arr = SparseArray(data=1, sparse_index=sparse_index, dtype=None)
- exp = SparseArray([1], dtype=None)
- tm.assert_sp_array_equal(arr, exp)
- assert arr.dtype == SparseDtype(np.int64)
- assert arr.fill_value == 0
- arr = SparseArray(data=1, sparse_index=IntIndex(1, [0]), dtype=None)
- exp = SparseArray([1], dtype=None)
- tm.assert_sp_array_equal(arr, exp)
- assert arr.dtype == SparseDtype(np.int64)
- assert arr.fill_value == 0
- def test_constructor_spindex_dtype_scalar_broadcasts(self):
- arr = SparseArray(
- data=[1, 2], sparse_index=IntIndex(4, [1, 2]), fill_value=0, dtype=None
- )
- exp = SparseArray([0, 1, 2, 0], fill_value=0, dtype=None)
- tm.assert_sp_array_equal(arr, exp)
- assert arr.dtype == SparseDtype(np.int64)
- assert arr.fill_value == 0
- @pytest.mark.parametrize(
- "data, fill_value",
- [
- (np.array([1, 2]), 0),
- (np.array([1.0, 2.0]), np.nan),
- ([True, False], False),
- ([pd.Timestamp("2017-01-01")], pd.NaT),
- ],
- )
- def test_constructor_inferred_fill_value(self, data, fill_value):
- result = SparseArray(data).fill_value
- if isna(fill_value):
- assert isna(result)
- else:
- assert result == fill_value
- @pytest.mark.parametrize("format", ["coo", "csc", "csr"])
- @pytest.mark.parametrize("size", [0, 10])
- @td.skip_if_no_scipy
- def test_from_spmatrix(self, size, format):
- import scipy.sparse
- mat = scipy.sparse.random(size, 1, density=0.5, format=format)
- result = SparseArray.from_spmatrix(mat)
- result = np.asarray(result)
- expected = mat.toarray().ravel()
- tm.assert_numpy_array_equal(result, expected)
- @pytest.mark.parametrize("format", ["coo", "csc", "csr"])
- @td.skip_if_no_scipy
- def test_from_spmatrix_including_explicit_zero(self, format):
- import scipy.sparse
- mat = scipy.sparse.random(10, 1, density=0.5, format=format)
- mat.data[0] = 0
- result = SparseArray.from_spmatrix(mat)
- result = np.asarray(result)
- expected = mat.toarray().ravel()
- tm.assert_numpy_array_equal(result, expected)
- @td.skip_if_no_scipy
- def test_from_spmatrix_raises(self):
- import scipy.sparse
- mat = scipy.sparse.eye(5, 4, format="csc")
- with pytest.raises(ValueError, match="not '4'"):
- SparseArray.from_spmatrix(mat)
- def test_constructor_from_too_large_array(self):
- with pytest.raises(TypeError, match="expected dimension <= 1 data"):
- SparseArray(np.arange(10).reshape((2, 5)))
- def test_constructor_from_sparse(self):
- zarr = SparseArray([0, 0, 1, 2, 3, 0, 4, 5, 0, 6], fill_value=0)
- res = SparseArray(zarr)
- assert res.fill_value == 0
- tm.assert_almost_equal(res.sp_values, zarr.sp_values)
- def test_constructor_copy(self):
- arr_data = np.array([np.nan, np.nan, 1, 2, 3, np.nan, 4, 5, np.nan, 6])
- arr = SparseArray(arr_data)
- cp = SparseArray(arr, copy=True)
- cp.sp_values[:3] = 0
- assert not (arr.sp_values[:3] == 0).any()
- not_copy = SparseArray(arr)
- not_copy.sp_values[:3] = 0
- assert (arr.sp_values[:3] == 0).all()
- def test_constructor_bool(self):
- # GH#10648
- data = np.array([False, False, True, True, False, False])
- arr = SparseArray(data, fill_value=False, dtype=bool)
- assert arr.dtype == SparseDtype(bool)
- tm.assert_numpy_array_equal(arr.sp_values, np.array([True, True]))
- # Behavior change: np.asarray densifies.
- # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
- tm.assert_numpy_array_equal(arr.sp_index.indices, np.array([2, 3], np.int32))
- dense = arr.to_dense()
- assert dense.dtype == bool
- tm.assert_numpy_array_equal(dense, data)
- def test_constructor_bool_fill_value(self):
- arr = SparseArray([True, False, True], dtype=None)
- assert arr.dtype == SparseDtype(np.bool_)
- assert not arr.fill_value
- arr = SparseArray([True, False, True], dtype=np.bool_)
- assert arr.dtype == SparseDtype(np.bool_)
- assert not arr.fill_value
- arr = SparseArray([True, False, True], dtype=np.bool_, fill_value=True)
- assert arr.dtype == SparseDtype(np.bool_, True)
- assert arr.fill_value
- def test_constructor_float32(self):
- # GH#10648
- data = np.array([1.0, np.nan, 3], dtype=np.float32)
- arr = SparseArray(data, dtype=np.float32)
- assert arr.dtype == SparseDtype(np.float32)
- tm.assert_numpy_array_equal(arr.sp_values, np.array([1, 3], dtype=np.float32))
- # Behavior change: np.asarray densifies.
- # tm.assert_numpy_array_equal(arr.sp_values, np.asarray(arr))
- tm.assert_numpy_array_equal(
- arr.sp_index.indices, np.array([0, 2], dtype=np.int32)
- )
- dense = arr.to_dense()
- assert dense.dtype == np.float32
- tm.assert_numpy_array_equal(dense, data)
|