test_function.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. import pandas._testing as tm
  5. from pandas.core.arrays import FloatingArray
  6. @pytest.mark.parametrize("ufunc", [np.abs, np.sign])
  7. # np.sign emits a warning with nans, <https://github.com/numpy/numpy/issues/15127>
  8. @pytest.mark.filterwarnings("ignore:invalid value encountered in sign:RuntimeWarning")
  9. def test_ufuncs_single_int(ufunc):
  10. a = pd.array([1, 2, -3, np.nan])
  11. result = ufunc(a)
  12. expected = pd.array(ufunc(a.astype(float)), dtype="Int64")
  13. tm.assert_extension_array_equal(result, expected)
  14. s = pd.Series(a)
  15. result = ufunc(s)
  16. expected = pd.Series(pd.array(ufunc(a.astype(float)), dtype="Int64"))
  17. tm.assert_series_equal(result, expected)
  18. @pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt])
  19. def test_ufuncs_single_float(ufunc):
  20. a = pd.array([1, 2, -3, np.nan])
  21. with np.errstate(invalid="ignore"):
  22. result = ufunc(a)
  23. expected = FloatingArray(ufunc(a.astype(float)), mask=a._mask)
  24. tm.assert_extension_array_equal(result, expected)
  25. s = pd.Series(a)
  26. with np.errstate(invalid="ignore"):
  27. result = ufunc(s)
  28. expected = pd.Series(expected)
  29. tm.assert_series_equal(result, expected)
  30. @pytest.mark.parametrize("ufunc", [np.add, np.subtract])
  31. def test_ufuncs_binary_int(ufunc):
  32. # two IntegerArrays
  33. a = pd.array([1, 2, -3, np.nan])
  34. result = ufunc(a, a)
  35. expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Int64")
  36. tm.assert_extension_array_equal(result, expected)
  37. # IntegerArray with numpy array
  38. arr = np.array([1, 2, 3, 4])
  39. result = ufunc(a, arr)
  40. expected = pd.array(ufunc(a.astype(float), arr), dtype="Int64")
  41. tm.assert_extension_array_equal(result, expected)
  42. result = ufunc(arr, a)
  43. expected = pd.array(ufunc(arr, a.astype(float)), dtype="Int64")
  44. tm.assert_extension_array_equal(result, expected)
  45. # IntegerArray with scalar
  46. result = ufunc(a, 1)
  47. expected = pd.array(ufunc(a.astype(float), 1), dtype="Int64")
  48. tm.assert_extension_array_equal(result, expected)
  49. result = ufunc(1, a)
  50. expected = pd.array(ufunc(1, a.astype(float)), dtype="Int64")
  51. tm.assert_extension_array_equal(result, expected)
  52. def test_ufunc_binary_output():
  53. a = pd.array([1, 2, np.nan])
  54. result = np.modf(a)
  55. expected = np.modf(a.to_numpy(na_value=np.nan, dtype="float"))
  56. expected = (pd.array(expected[0]), pd.array(expected[1]))
  57. assert isinstance(result, tuple)
  58. assert len(result) == 2
  59. for x, y in zip(result, expected):
  60. tm.assert_extension_array_equal(x, y)
  61. @pytest.mark.parametrize("values", [[0, 1], [0, None]])
  62. def test_ufunc_reduce_raises(values):
  63. arr = pd.array(values)
  64. res = np.add.reduce(arr)
  65. expected = arr.sum(skipna=False)
  66. tm.assert_almost_equal(res, expected)
  67. @pytest.mark.parametrize(
  68. "pandasmethname, kwargs",
  69. [
  70. ("var", {"ddof": 0}),
  71. ("var", {"ddof": 1}),
  72. ("std", {"ddof": 0}),
  73. ("std", {"ddof": 1}),
  74. ("kurtosis", {}),
  75. ("skew", {}),
  76. ("sem", {}),
  77. ],
  78. )
  79. def test_stat_method(pandasmethname, kwargs):
  80. s = pd.Series(data=[1, 2, 3, 4, 5, 6, np.nan, np.nan], dtype="Int64")
  81. pandasmeth = getattr(s, pandasmethname)
  82. result = pandasmeth(**kwargs)
  83. s2 = pd.Series(data=[1, 2, 3, 4, 5, 6], dtype="Int64")
  84. pandasmeth = getattr(s2, pandasmethname)
  85. expected = pandasmeth(**kwargs)
  86. assert expected == result
  87. def test_value_counts_na():
  88. arr = pd.array([1, 2, 1, pd.NA], dtype="Int64")
  89. result = arr.value_counts(dropna=False)
  90. ex_index = pd.Index([1, 2, pd.NA], dtype="Int64")
  91. assert ex_index.dtype == "Int64"
  92. expected = pd.Series([2, 1, 1], index=ex_index, dtype="Int64", name="count")
  93. tm.assert_series_equal(result, expected)
  94. result = arr.value_counts(dropna=True)
  95. expected = pd.Series([2, 1], index=arr[:2], dtype="Int64", name="count")
  96. assert expected.index.dtype == arr.dtype
  97. tm.assert_series_equal(result, expected)
  98. def test_value_counts_empty():
  99. # https://github.com/pandas-dev/pandas/issues/33317
  100. ser = pd.Series([], dtype="Int64")
  101. result = ser.value_counts()
  102. idx = pd.Index([], dtype=ser.dtype)
  103. assert idx.dtype == ser.dtype
  104. expected = pd.Series([], index=idx, dtype="Int64", name="count")
  105. tm.assert_series_equal(result, expected)
  106. def test_value_counts_with_normalize():
  107. # GH 33172
  108. ser = pd.Series([1, 2, 1, pd.NA], dtype="Int64")
  109. result = ser.value_counts(normalize=True)
  110. expected = pd.Series([2, 1], index=ser[:2], dtype="Float64", name="proportion") / 3
  111. assert expected.index.dtype == ser.dtype
  112. tm.assert_series_equal(result, expected)
  113. @pytest.mark.parametrize("skipna", [True, False])
  114. @pytest.mark.parametrize("min_count", [0, 4])
  115. def test_integer_array_sum(skipna, min_count, any_int_ea_dtype):
  116. dtype = any_int_ea_dtype
  117. arr = pd.array([1, 2, 3, None], dtype=dtype)
  118. result = arr.sum(skipna=skipna, min_count=min_count)
  119. if skipna and min_count == 0:
  120. assert result == 6
  121. else:
  122. assert result is pd.NA
  123. @pytest.mark.parametrize("skipna", [True, False])
  124. @pytest.mark.parametrize("method", ["min", "max"])
  125. def test_integer_array_min_max(skipna, method, any_int_ea_dtype):
  126. dtype = any_int_ea_dtype
  127. arr = pd.array([0, 1, None], dtype=dtype)
  128. func = getattr(arr, method)
  129. result = func(skipna=skipna)
  130. if skipna:
  131. assert result == (0 if method == "min" else 1)
  132. else:
  133. assert result is pd.NA
  134. @pytest.mark.parametrize("skipna", [True, False])
  135. @pytest.mark.parametrize("min_count", [0, 9])
  136. def test_integer_array_prod(skipna, min_count, any_int_ea_dtype):
  137. dtype = any_int_ea_dtype
  138. arr = pd.array([1, 2, None], dtype=dtype)
  139. result = arr.prod(skipna=skipna, min_count=min_count)
  140. if skipna and min_count == 0:
  141. assert result == 2
  142. else:
  143. assert result is pd.NA
  144. @pytest.mark.parametrize(
  145. "values, expected", [([1, 2, 3], 6), ([1, 2, 3, None], 6), ([None], 0)]
  146. )
  147. def test_integer_array_numpy_sum(values, expected):
  148. arr = pd.array(values, dtype="Int64")
  149. result = np.sum(arr)
  150. assert result == expected
  151. @pytest.mark.parametrize("op", ["sum", "prod", "min", "max"])
  152. def test_dataframe_reductions(op):
  153. # https://github.com/pandas-dev/pandas/pull/32867
  154. # ensure the integers are not cast to float during reductions
  155. df = pd.DataFrame({"a": pd.array([1, 2], dtype="Int64")})
  156. result = df.max()
  157. assert isinstance(result["a"], np.int64)
  158. # TODO(jreback) - these need testing / are broken
  159. # shift
  160. # set_index (destroys type)