test_function.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. import numpy as np
  2. import pytest
  3. from pandas.compat import IS64
  4. import pandas as pd
  5. import pandas._testing as tm
  6. @pytest.mark.parametrize("ufunc", [np.abs, np.sign])
  7. # np.sign emits a warning with nans, <https://github.com/numpy/numpy/issues/15127>
  8. @pytest.mark.filterwarnings("ignore:invalid value encountered in sign:RuntimeWarning")
  9. def test_ufuncs_single(ufunc):
  10. a = pd.array([1, 2, -3, np.nan], dtype="Float64")
  11. result = ufunc(a)
  12. expected = pd.array(ufunc(a.astype(float)), dtype="Float64")
  13. tm.assert_extension_array_equal(result, expected)
  14. s = pd.Series(a)
  15. result = ufunc(s)
  16. expected = pd.Series(expected)
  17. tm.assert_series_equal(result, expected)
  18. @pytest.mark.parametrize("ufunc", [np.log, np.exp, np.sin, np.cos, np.sqrt])
  19. def test_ufuncs_single_float(ufunc):
  20. a = pd.array([1.0, 0.2, 3.0, np.nan], dtype="Float64")
  21. with np.errstate(invalid="ignore"):
  22. result = ufunc(a)
  23. expected = pd.array(ufunc(a.astype(float)), dtype="Float64")
  24. tm.assert_extension_array_equal(result, expected)
  25. s = pd.Series(a)
  26. with np.errstate(invalid="ignore"):
  27. result = ufunc(s)
  28. expected = pd.Series(ufunc(s.astype(float)), dtype="Float64")
  29. tm.assert_series_equal(result, expected)
  30. @pytest.mark.parametrize("ufunc", [np.add, np.subtract])
  31. def test_ufuncs_binary_float(ufunc):
  32. # two FloatingArrays
  33. a = pd.array([1, 0.2, -3, np.nan], dtype="Float64")
  34. result = ufunc(a, a)
  35. expected = pd.array(ufunc(a.astype(float), a.astype(float)), dtype="Float64")
  36. tm.assert_extension_array_equal(result, expected)
  37. # FloatingArray with numpy array
  38. arr = np.array([1, 2, 3, 4])
  39. result = ufunc(a, arr)
  40. expected = pd.array(ufunc(a.astype(float), arr), dtype="Float64")
  41. tm.assert_extension_array_equal(result, expected)
  42. result = ufunc(arr, a)
  43. expected = pd.array(ufunc(arr, a.astype(float)), dtype="Float64")
  44. tm.assert_extension_array_equal(result, expected)
  45. # FloatingArray with scalar
  46. result = ufunc(a, 1)
  47. expected = pd.array(ufunc(a.astype(float), 1), dtype="Float64")
  48. tm.assert_extension_array_equal(result, expected)
  49. result = ufunc(1, a)
  50. expected = pd.array(ufunc(1, a.astype(float)), dtype="Float64")
  51. tm.assert_extension_array_equal(result, expected)
  52. @pytest.mark.parametrize("values", [[0, 1], [0, None]])
  53. def test_ufunc_reduce_raises(values):
  54. arr = pd.array(values, dtype="Float64")
  55. res = np.add.reduce(arr)
  56. expected = arr.sum(skipna=False)
  57. tm.assert_almost_equal(res, expected)
  58. @pytest.mark.skipif(not IS64, reason="GH 36579: fail on 32-bit system")
  59. @pytest.mark.parametrize(
  60. "pandasmethname, kwargs",
  61. [
  62. ("var", {"ddof": 0}),
  63. ("var", {"ddof": 1}),
  64. ("std", {"ddof": 0}),
  65. ("std", {"ddof": 1}),
  66. ("kurtosis", {}),
  67. ("skew", {}),
  68. ("sem", {}),
  69. ],
  70. )
  71. def test_stat_method(pandasmethname, kwargs):
  72. s = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, np.nan, np.nan], dtype="Float64")
  73. pandasmeth = getattr(s, pandasmethname)
  74. result = pandasmeth(**kwargs)
  75. s2 = pd.Series(data=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6], dtype="float64")
  76. pandasmeth = getattr(s2, pandasmethname)
  77. expected = pandasmeth(**kwargs)
  78. assert expected == result
  79. def test_value_counts_na():
  80. arr = pd.array([0.1, 0.2, 0.1, pd.NA], dtype="Float64")
  81. result = arr.value_counts(dropna=False)
  82. idx = pd.Index([0.1, 0.2, pd.NA], dtype=arr.dtype)
  83. assert idx.dtype == arr.dtype
  84. expected = pd.Series([2, 1, 1], index=idx, dtype="Int64", name="count")
  85. tm.assert_series_equal(result, expected)
  86. result = arr.value_counts(dropna=True)
  87. expected = pd.Series([2, 1], index=idx[:-1], dtype="Int64", name="count")
  88. tm.assert_series_equal(result, expected)
  89. def test_value_counts_empty():
  90. ser = pd.Series([], dtype="Float64")
  91. result = ser.value_counts()
  92. idx = pd.Index([], dtype="Float64")
  93. assert idx.dtype == "Float64"
  94. expected = pd.Series([], index=idx, dtype="Int64", name="count")
  95. tm.assert_series_equal(result, expected)
  96. def test_value_counts_with_normalize():
  97. ser = pd.Series([0.1, 0.2, 0.1, pd.NA], dtype="Float64")
  98. result = ser.value_counts(normalize=True)
  99. expected = pd.Series([2, 1], index=ser[:2], dtype="Float64", name="proportion") / 3
  100. assert expected.index.dtype == ser.dtype
  101. tm.assert_series_equal(result, expected)
  102. @pytest.mark.parametrize("skipna", [True, False])
  103. @pytest.mark.parametrize("min_count", [0, 4])
  104. def test_floating_array_sum(skipna, min_count, dtype):
  105. arr = pd.array([1, 2, 3, None], dtype=dtype)
  106. result = arr.sum(skipna=skipna, min_count=min_count)
  107. if skipna and min_count == 0:
  108. assert result == 6.0
  109. else:
  110. assert result is pd.NA
  111. @pytest.mark.parametrize(
  112. "values, expected", [([1, 2, 3], 6.0), ([1, 2, 3, None], 6.0), ([None], 0.0)]
  113. )
  114. def test_floating_array_numpy_sum(values, expected):
  115. arr = pd.array(values, dtype="Float64")
  116. result = np.sum(arr)
  117. assert result == expected
  118. @pytest.mark.parametrize("op", ["sum", "min", "max", "prod"])
  119. def test_preserve_dtypes(op):
  120. df = pd.DataFrame(
  121. {
  122. "A": ["a", "b", "b"],
  123. "B": [1, None, 3],
  124. "C": pd.array([0.1, None, 3.0], dtype="Float64"),
  125. }
  126. )
  127. # op
  128. result = getattr(df.C, op)()
  129. assert isinstance(result, np.float64)
  130. # groupby
  131. result = getattr(df.groupby("A"), op)()
  132. expected = pd.DataFrame(
  133. {"B": np.array([1.0, 3.0]), "C": pd.array([0.1, 3], dtype="Float64")},
  134. index=pd.Index(["a", "b"], name="A"),
  135. )
  136. tm.assert_frame_equal(result, expected)
  137. @pytest.mark.parametrize("skipna", [True, False])
  138. @pytest.mark.parametrize("method", ["min", "max"])
  139. def test_floating_array_min_max(skipna, method, dtype):
  140. arr = pd.array([0.0, 1.0, None], dtype=dtype)
  141. func = getattr(arr, method)
  142. result = func(skipna=skipna)
  143. if skipna:
  144. assert result == (0 if method == "min" else 1)
  145. else:
  146. assert result is pd.NA
  147. @pytest.mark.parametrize("skipna", [True, False])
  148. @pytest.mark.parametrize("min_count", [0, 9])
  149. def test_floating_array_prod(skipna, min_count, dtype):
  150. arr = pd.array([1.0, 2.0, None], dtype=dtype)
  151. result = arr.prod(skipna=skipna, min_count=min_count)
  152. if skipna and min_count == 0:
  153. assert result == 2
  154. else:
  155. assert result is pd.NA