123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365 |
- import operator
- import numpy as np
- import pytest
- import pandas as pd
- import pandas._testing as tm
- from pandas.core import ops
- from pandas.core.arrays import FloatingArray
- # Basic test for the arithmetic array ops
- # -----------------------------------------------------------------------------
- @pytest.mark.parametrize(
- "opname, exp",
- [("add", [1, 3, None, None, 9]), ("mul", [0, 2, None, None, 20])],
- ids=["add", "mul"],
- )
- def test_add_mul(dtype, opname, exp):
- a = pd.array([0, 1, None, 3, 4], dtype=dtype)
- b = pd.array([1, 2, 3, None, 5], dtype=dtype)
- # array / array
- expected = pd.array(exp, dtype=dtype)
- op = getattr(operator, opname)
- result = op(a, b)
- tm.assert_extension_array_equal(result, expected)
- op = getattr(ops, "r" + opname)
- result = op(a, b)
- tm.assert_extension_array_equal(result, expected)
- def test_sub(dtype):
- a = pd.array([1, 2, 3, None, 5], dtype=dtype)
- b = pd.array([0, 1, None, 3, 4], dtype=dtype)
- result = a - b
- expected = pd.array([1, 1, None, None, 1], dtype=dtype)
- tm.assert_extension_array_equal(result, expected)
- def test_div(dtype):
- a = pd.array([1, 2, 3, None, 5], dtype=dtype)
- b = pd.array([0, 1, None, 3, 4], dtype=dtype)
- result = a / b
- expected = pd.array([np.inf, 2, None, None, 1.25], dtype="Float64")
- tm.assert_extension_array_equal(result, expected)
- @pytest.mark.parametrize("zero, negative", [(0, False), (0.0, False), (-0.0, True)])
- def test_divide_by_zero(zero, negative):
- # https://github.com/pandas-dev/pandas/issues/27398, GH#22793
- a = pd.array([0, 1, -1, None], dtype="Int64")
- result = a / zero
- expected = FloatingArray(
- np.array([np.nan, np.inf, -np.inf, 1], dtype="float64"),
- np.array([False, False, False, True]),
- )
- if negative:
- expected *= -1
- tm.assert_extension_array_equal(result, expected)
- def test_floordiv(dtype):
- a = pd.array([1, 2, 3, None, 5], dtype=dtype)
- b = pd.array([0, 1, None, 3, 4], dtype=dtype)
- result = a // b
- # Series op sets 1//0 to np.inf, which IntegerArray does not do (yet)
- expected = pd.array([0, 2, None, None, 1], dtype=dtype)
- tm.assert_extension_array_equal(result, expected)
- def test_floordiv_by_int_zero_no_mask(any_int_ea_dtype):
- # GH 48223: Aligns with non-masked floordiv
- # but differs from numpy
- # https://github.com/pandas-dev/pandas/issues/30188#issuecomment-564452740
- ser = pd.Series([0, 1], dtype=any_int_ea_dtype)
- result = 1 // ser
- expected = pd.Series([np.inf, 1.0], dtype="Float64")
- tm.assert_series_equal(result, expected)
- ser_non_nullable = ser.astype(ser.dtype.numpy_dtype)
- result = 1 // ser_non_nullable
- expected = expected.astype(np.float64)
- tm.assert_series_equal(result, expected)
- def test_mod(dtype):
- a = pd.array([1, 2, 3, None, 5], dtype=dtype)
- b = pd.array([0, 1, None, 3, 4], dtype=dtype)
- result = a % b
- expected = pd.array([0, 0, None, None, 1], dtype=dtype)
- tm.assert_extension_array_equal(result, expected)
- def test_pow_scalar():
- a = pd.array([-1, 0, 1, None, 2], dtype="Int64")
- result = a**0
- expected = pd.array([1, 1, 1, 1, 1], dtype="Int64")
- tm.assert_extension_array_equal(result, expected)
- result = a**1
- expected = pd.array([-1, 0, 1, None, 2], dtype="Int64")
- tm.assert_extension_array_equal(result, expected)
- result = a**pd.NA
- expected = pd.array([None, None, 1, None, None], dtype="Int64")
- tm.assert_extension_array_equal(result, expected)
- result = a**np.nan
- expected = FloatingArray(
- np.array([np.nan, np.nan, 1, np.nan, np.nan], dtype="float64"),
- np.array([False, False, False, True, False]),
- )
- tm.assert_extension_array_equal(result, expected)
- # reversed
- a = a[1:] # Can't raise integers to negative powers.
- result = 0**a
- expected = pd.array([1, 0, None, 0], dtype="Int64")
- tm.assert_extension_array_equal(result, expected)
- result = 1**a
- expected = pd.array([1, 1, 1, 1], dtype="Int64")
- tm.assert_extension_array_equal(result, expected)
- result = pd.NA**a
- expected = pd.array([1, None, None, None], dtype="Int64")
- tm.assert_extension_array_equal(result, expected)
- result = np.nan**a
- expected = FloatingArray(
- np.array([1, np.nan, np.nan, np.nan], dtype="float64"),
- np.array([False, False, True, False]),
- )
- tm.assert_extension_array_equal(result, expected)
- def test_pow_array():
- a = pd.array([0, 0, 0, 1, 1, 1, None, None, None])
- b = pd.array([0, 1, None, 0, 1, None, 0, 1, None])
- result = a**b
- expected = pd.array([1, 0, None, 1, 1, 1, 1, None, None])
- tm.assert_extension_array_equal(result, expected)
- def test_rpow_one_to_na():
- # https://github.com/pandas-dev/pandas/issues/22022
- # https://github.com/pandas-dev/pandas/issues/29997
- arr = pd.array([np.nan, np.nan], dtype="Int64")
- result = np.array([1.0, 2.0]) ** arr
- expected = pd.array([1.0, np.nan], dtype="Float64")
- tm.assert_extension_array_equal(result, expected)
- @pytest.mark.parametrize("other", [0, 0.5])
- def test_numpy_zero_dim_ndarray(other):
- arr = pd.array([1, None, 2])
- result = arr + np.array(other)
- expected = arr + other
- tm.assert_equal(result, expected)
- # Test generic characteristics / errors
- # -----------------------------------------------------------------------------
- def test_error_invalid_values(data, all_arithmetic_operators):
- op = all_arithmetic_operators
- s = pd.Series(data)
- ops = getattr(s, op)
- # invalid scalars
- msg = "|".join(
- [
- r"can only perform ops with numeric values",
- r"IntegerArray cannot perform the operation mod",
- r"unsupported operand type",
- r"can only concatenate str \(not \"int\"\) to str",
- "not all arguments converted during string",
- "ufunc '.*' not supported for the input types, and the inputs could not",
- "ufunc '.*' did not contain a loop with signature matching types",
- "Addition/subtraction of integers and integer-arrays with Timestamp",
- ]
- )
- with pytest.raises(TypeError, match=msg):
- ops("foo")
- with pytest.raises(TypeError, match=msg):
- ops(pd.Timestamp("20180101"))
- # invalid array-likes
- str_ser = pd.Series("foo", index=s.index)
- # with pytest.raises(TypeError, match=msg):
- if all_arithmetic_operators in [
- "__mul__",
- "__rmul__",
- ]: # (data[~data.isna()] >= 0).all():
- res = ops(str_ser)
- expected = pd.Series(["foo" * x for x in data], index=s.index)
- tm.assert_series_equal(res, expected)
- else:
- with pytest.raises(TypeError, match=msg):
- ops(str_ser)
- msg = "|".join(
- [
- "can only perform ops with numeric values",
- "cannot perform .* with this index type: DatetimeArray",
- "Addition/subtraction of integers and integer-arrays "
- "with DatetimeArray is no longer supported. *",
- "unsupported operand type",
- r"can only concatenate str \(not \"int\"\) to str",
- "not all arguments converted during string",
- "cannot subtract DatetimeArray from ndarray",
- ]
- )
- with pytest.raises(TypeError, match=msg):
- ops(pd.Series(pd.date_range("20180101", periods=len(s))))
- # Various
- # -----------------------------------------------------------------------------
- # TODO test unsigned overflow
- def test_arith_coerce_scalar(data, all_arithmetic_operators):
- op = tm.get_op_from_name(all_arithmetic_operators)
- s = pd.Series(data)
- other = 0.01
- result = op(s, other)
- expected = op(s.astype(float), other)
- expected = expected.astype("Float64")
- # rmod results in NaN that wasn't NA in original nullable Series -> unmask it
- if all_arithmetic_operators == "__rmod__":
- mask = (s == 0).fillna(False).to_numpy(bool)
- expected.array._mask[mask] = False
- tm.assert_series_equal(result, expected)
- @pytest.mark.parametrize("other", [1.0, np.array(1.0)])
- def test_arithmetic_conversion(all_arithmetic_operators, other):
- # if we have a float operand we should have a float result
- # if that is equal to an integer
- op = tm.get_op_from_name(all_arithmetic_operators)
- s = pd.Series([1, 2, 3], dtype="Int64")
- result = op(s, other)
- assert result.dtype == "Float64"
- def test_cross_type_arithmetic():
- df = pd.DataFrame(
- {
- "A": pd.Series([1, 2, np.nan], dtype="Int64"),
- "B": pd.Series([1, np.nan, 3], dtype="UInt8"),
- "C": [1, 2, 3],
- }
- )
- result = df.A + df.C
- expected = pd.Series([2, 4, np.nan], dtype="Int64")
- tm.assert_series_equal(result, expected)
- result = (df.A + df.C) * 3 == 12
- expected = pd.Series([False, True, None], dtype="boolean")
- tm.assert_series_equal(result, expected)
- result = df.A + df.B
- expected = pd.Series([2, np.nan, np.nan], dtype="Int64")
- tm.assert_series_equal(result, expected)
- @pytest.mark.parametrize("op", ["mean"])
- def test_reduce_to_float(op):
- # some reduce ops always return float, even if the result
- # is a rounded number
- df = pd.DataFrame(
- {
- "A": ["a", "b", "b"],
- "B": [1, None, 3],
- "C": pd.array([1, None, 3], dtype="Int64"),
- }
- )
- # op
- result = getattr(df.C, op)()
- assert isinstance(result, float)
- # groupby
- result = getattr(df.groupby("A"), op)()
- expected = pd.DataFrame(
- {"B": np.array([1.0, 3.0]), "C": pd.array([1, 3], dtype="Float64")},
- index=pd.Index(["a", "b"], name="A"),
- )
- tm.assert_frame_equal(result, expected)
- @pytest.mark.parametrize(
- "source, neg_target, abs_target",
- [
- ([1, 2, 3], [-1, -2, -3], [1, 2, 3]),
- ([1, 2, None], [-1, -2, None], [1, 2, None]),
- ([-1, 0, 1], [1, 0, -1], [1, 0, 1]),
- ],
- )
- def test_unary_int_operators(any_signed_int_ea_dtype, source, neg_target, abs_target):
- dtype = any_signed_int_ea_dtype
- arr = pd.array(source, dtype=dtype)
- neg_result, pos_result, abs_result = -arr, +arr, abs(arr)
- neg_target = pd.array(neg_target, dtype=dtype)
- abs_target = pd.array(abs_target, dtype=dtype)
- tm.assert_extension_array_equal(neg_result, neg_target)
- tm.assert_extension_array_equal(pos_result, arr)
- assert not tm.shares_memory(pos_result, arr)
- tm.assert_extension_array_equal(abs_result, abs_target)
- def test_values_multiplying_large_series_by_NA():
- # GH#33701
- result = pd.NA * pd.Series(np.zeros(10001))
- expected = pd.Series([pd.NA] * 10001)
- tm.assert_series_equal(result, expected)
- def test_bitwise(dtype):
- left = pd.array([1, None, 3, 4], dtype=dtype)
- right = pd.array([None, 3, 5, 4], dtype=dtype)
- result = left | right
- expected = pd.array([None, None, 3 | 5, 4 | 4], dtype=dtype)
- tm.assert_extension_array_equal(result, expected)
- result = left & right
- expected = pd.array([None, None, 3 & 5, 4 & 4], dtype=dtype)
- tm.assert_extension_array_equal(result, expected)
- result = left ^ right
- expected = pd.array([None, None, 3 ^ 5, 4 ^ 4], dtype=dtype)
- tm.assert_extension_array_equal(result, expected)
- # TODO: desired behavior when operating with boolean? defer?
- floats = right.astype("Float64")
- with pytest.raises(TypeError, match="unsupported operand type"):
- left | floats
- with pytest.raises(TypeError, match="unsupported operand type"):
- left & floats
- with pytest.raises(TypeError, match="unsupported operand type"):
- left ^ floats
|