test_reductions.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. import numpy as np
  2. import pytest
  3. from pandas.core.dtypes.dtypes import DatetimeTZDtype
  4. import pandas as pd
  5. from pandas import NaT
  6. import pandas._testing as tm
  7. from pandas.core.arrays import DatetimeArray
  8. class TestReductions:
  9. @pytest.fixture(params=["s", "ms", "us", "ns"])
  10. def unit(self, request):
  11. return request.param
  12. @pytest.fixture
  13. def arr1d(self, tz_naive_fixture):
  14. """Fixture returning DatetimeArray with parametrized timezones"""
  15. tz = tz_naive_fixture
  16. dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]")
  17. arr = DatetimeArray._from_sequence(
  18. [
  19. "2000-01-03",
  20. "2000-01-03",
  21. "NaT",
  22. "2000-01-02",
  23. "2000-01-05",
  24. "2000-01-04",
  25. ],
  26. dtype=dtype,
  27. )
  28. return arr
  29. def test_min_max(self, arr1d, unit):
  30. arr = arr1d
  31. arr = arr.as_unit(unit)
  32. tz = arr.tz
  33. result = arr.min()
  34. expected = pd.Timestamp("2000-01-02", tz=tz).as_unit(unit)
  35. assert result == expected
  36. assert result.unit == expected.unit
  37. result = arr.max()
  38. expected = pd.Timestamp("2000-01-05", tz=tz).as_unit(unit)
  39. assert result == expected
  40. assert result.unit == expected.unit
  41. result = arr.min(skipna=False)
  42. assert result is NaT
  43. result = arr.max(skipna=False)
  44. assert result is NaT
  45. @pytest.mark.parametrize("tz", [None, "US/Central"])
  46. @pytest.mark.parametrize("skipna", [True, False])
  47. def test_min_max_empty(self, skipna, tz):
  48. dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]")
  49. arr = DatetimeArray._from_sequence([], dtype=dtype)
  50. result = arr.min(skipna=skipna)
  51. assert result is NaT
  52. result = arr.max(skipna=skipna)
  53. assert result is NaT
  54. @pytest.mark.parametrize("tz", [None, "US/Central"])
  55. @pytest.mark.parametrize("skipna", [True, False])
  56. def test_median_empty(self, skipna, tz):
  57. dtype = DatetimeTZDtype(tz=tz) if tz is not None else np.dtype("M8[ns]")
  58. arr = DatetimeArray._from_sequence([], dtype=dtype)
  59. result = arr.median(skipna=skipna)
  60. assert result is NaT
  61. arr = arr.reshape(0, 3)
  62. result = arr.median(axis=0, skipna=skipna)
  63. expected = type(arr)._from_sequence([NaT, NaT, NaT], dtype=arr.dtype)
  64. tm.assert_equal(result, expected)
  65. result = arr.median(axis=1, skipna=skipna)
  66. expected = type(arr)._from_sequence([], dtype=arr.dtype)
  67. tm.assert_equal(result, expected)
  68. def test_median(self, arr1d):
  69. arr = arr1d
  70. result = arr.median()
  71. assert result == arr[0]
  72. result = arr.median(skipna=False)
  73. assert result is NaT
  74. result = arr.dropna().median(skipna=False)
  75. assert result == arr[0]
  76. result = arr.median(axis=0)
  77. assert result == arr[0]
  78. def test_median_axis(self, arr1d):
  79. arr = arr1d
  80. assert arr.median(axis=0) == arr.median()
  81. assert arr.median(axis=0, skipna=False) is NaT
  82. msg = r"abs\(axis\) must be less than ndim"
  83. with pytest.raises(ValueError, match=msg):
  84. arr.median(axis=1)
  85. @pytest.mark.filterwarnings("ignore:All-NaN slice encountered:RuntimeWarning")
  86. def test_median_2d(self, arr1d):
  87. arr = arr1d.reshape(1, -1)
  88. # axis = None
  89. assert arr.median() == arr1d.median()
  90. assert arr.median(skipna=False) is NaT
  91. # axis = 0
  92. result = arr.median(axis=0)
  93. expected = arr1d
  94. tm.assert_equal(result, expected)
  95. # Since column 3 is all-NaT, we get NaT there with or without skipna
  96. result = arr.median(axis=0, skipna=False)
  97. expected = arr1d
  98. tm.assert_equal(result, expected)
  99. # axis = 1
  100. result = arr.median(axis=1)
  101. expected = type(arr)._from_sequence([arr1d.median()])
  102. tm.assert_equal(result, expected)
  103. result = arr.median(axis=1, skipna=False)
  104. expected = type(arr)._from_sequence([NaT], dtype=arr.dtype)
  105. tm.assert_equal(result, expected)
  106. def test_mean(self, arr1d):
  107. arr = arr1d
  108. # manually verified result
  109. expected = arr[0] + 0.4 * pd.Timedelta(days=1)
  110. result = arr.mean()
  111. assert result == expected
  112. result = arr.mean(skipna=False)
  113. assert result is NaT
  114. result = arr.dropna().mean(skipna=False)
  115. assert result == expected
  116. result = arr.mean(axis=0)
  117. assert result == expected
  118. def test_mean_2d(self):
  119. dti = pd.date_range("2016-01-01", periods=6, tz="US/Pacific")
  120. dta = dti._data.reshape(3, 2)
  121. result = dta.mean(axis=0)
  122. expected = dta[1]
  123. tm.assert_datetime_array_equal(result, expected)
  124. result = dta.mean(axis=1)
  125. expected = dta[:, 0] + pd.Timedelta(hours=12)
  126. tm.assert_datetime_array_equal(result, expected)
  127. result = dta.mean(axis=None)
  128. expected = dti.mean()
  129. assert result == expected
  130. @pytest.mark.parametrize("skipna", [True, False])
  131. def test_mean_empty(self, arr1d, skipna):
  132. arr = arr1d[:0]
  133. assert arr.mean(skipna=skipna) is NaT
  134. arr2d = arr.reshape(0, 3)
  135. result = arr2d.mean(axis=0, skipna=skipna)
  136. expected = DatetimeArray._from_sequence([NaT, NaT, NaT], dtype=arr.dtype)
  137. tm.assert_datetime_array_equal(result, expected)
  138. result = arr2d.mean(axis=1, skipna=skipna)
  139. expected = arr # i.e. 1D, empty
  140. tm.assert_datetime_array_equal(result, expected)
  141. result = arr2d.mean(axis=None, skipna=skipna)
  142. assert result is NaT