test_reductions.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import Series
  5. import pandas._testing as tm
  6. @pytest.mark.parametrize("operation, expected", [("min", "a"), ("max", "b")])
  7. def test_reductions_series_strings(operation, expected):
  8. # GH#31746
  9. ser = Series(["a", "b"], dtype="string")
  10. res_operation_serie = getattr(ser, operation)()
  11. assert res_operation_serie == expected
  12. @pytest.mark.parametrize("as_period", [True, False])
  13. def test_mode_extension_dtype(as_period):
  14. # GH#41927 preserve dt64tz dtype
  15. ser = Series([pd.Timestamp(1979, 4, n) for n in range(1, 5)])
  16. if as_period:
  17. ser = ser.dt.to_period("D")
  18. else:
  19. ser = ser.dt.tz_localize("US/Central")
  20. res = ser.mode()
  21. assert res.dtype == ser.dtype
  22. tm.assert_series_equal(res, ser)
  23. def test_reductions_td64_with_nat():
  24. # GH#8617
  25. ser = Series([0, pd.NaT], dtype="m8[ns]")
  26. exp = ser[0]
  27. assert ser.median() == exp
  28. assert ser.min() == exp
  29. assert ser.max() == exp
  30. @pytest.mark.parametrize("skipna", [True, False])
  31. def test_td64_sum_empty(skipna):
  32. # GH#37151
  33. ser = Series([], dtype="timedelta64[ns]")
  34. result = ser.sum(skipna=skipna)
  35. assert isinstance(result, pd.Timedelta)
  36. assert result == pd.Timedelta(0)
  37. def test_td64_summation_overflow():
  38. # GH#9442
  39. ser = Series(pd.date_range("20130101", periods=100000, freq="H"))
  40. ser[0] += pd.Timedelta("1s 1ms")
  41. # mean
  42. result = (ser - ser.min()).mean()
  43. expected = pd.Timedelta((pd.TimedeltaIndex(ser - ser.min()).asi8 / len(ser)).sum())
  44. # the computation is converted to float so
  45. # might be some loss of precision
  46. assert np.allclose(result._value / 1000, expected._value / 1000)
  47. # sum
  48. msg = "overflow in timedelta operation"
  49. with pytest.raises(ValueError, match=msg):
  50. (ser - ser.min()).sum()
  51. s1 = ser[0:10000]
  52. with pytest.raises(ValueError, match=msg):
  53. (s1 - s1.min()).sum()
  54. s2 = ser[0:1000]
  55. (s2 - s2.min()).sum()
  56. def test_prod_numpy16_bug():
  57. ser = Series([1.0, 1.0, 1.0], index=range(3))
  58. result = ser.prod()
  59. assert not isinstance(result, Series)
  60. @pytest.mark.parametrize("func", [np.any, np.all])
  61. @pytest.mark.parametrize("kwargs", [{"keepdims": True}, {"out": object()}])
  62. def test_validate_any_all_out_keepdims_raises(kwargs, func):
  63. ser = Series([1, 2])
  64. param = list(kwargs)[0]
  65. name = func.__name__
  66. msg = (
  67. f"the '{param}' parameter is not "
  68. "supported in the pandas "
  69. rf"implementation of {name}\(\)"
  70. )
  71. with pytest.raises(ValueError, match=msg):
  72. func(ser, **kwargs)
  73. def test_validate_sum_initial():
  74. ser = Series([1, 2])
  75. msg = (
  76. r"the 'initial' parameter is not "
  77. r"supported in the pandas "
  78. r"implementation of sum\(\)"
  79. )
  80. with pytest.raises(ValueError, match=msg):
  81. np.sum(ser, initial=10)
  82. def test_validate_median_initial():
  83. ser = Series([1, 2])
  84. msg = (
  85. r"the 'overwrite_input' parameter is not "
  86. r"supported in the pandas "
  87. r"implementation of median\(\)"
  88. )
  89. with pytest.raises(ValueError, match=msg):
  90. # It seems like np.median doesn't dispatch, so we use the
  91. # method instead of the ufunc.
  92. ser.median(overwrite_input=True)
  93. def test_validate_stat_keepdims():
  94. ser = Series([1, 2])
  95. msg = (
  96. r"the 'keepdims' parameter is not "
  97. r"supported in the pandas "
  98. r"implementation of sum\(\)"
  99. )
  100. with pytest.raises(ValueError, match=msg):
  101. np.sum(ser, keepdims=True)