test_reductions.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. from pandas import Timedelta
  5. import pandas._testing as tm
  6. from pandas.core import nanops
  7. from pandas.core.arrays import TimedeltaArray
  8. class TestReductions:
  9. @pytest.mark.parametrize("name", ["std", "min", "max", "median", "mean"])
  10. @pytest.mark.parametrize("skipna", [True, False])
  11. def test_reductions_empty(self, name, skipna):
  12. tdi = pd.TimedeltaIndex([])
  13. arr = tdi.array
  14. result = getattr(tdi, name)(skipna=skipna)
  15. assert result is pd.NaT
  16. result = getattr(arr, name)(skipna=skipna)
  17. assert result is pd.NaT
  18. @pytest.mark.parametrize("skipna", [True, False])
  19. def test_sum_empty(self, skipna):
  20. tdi = pd.TimedeltaIndex([])
  21. arr = tdi.array
  22. result = tdi.sum(skipna=skipna)
  23. assert isinstance(result, Timedelta)
  24. assert result == Timedelta(0)
  25. result = arr.sum(skipna=skipna)
  26. assert isinstance(result, Timedelta)
  27. assert result == Timedelta(0)
  28. def test_min_max(self):
  29. arr = TimedeltaArray._from_sequence(["3H", "3H", "NaT", "2H", "5H", "4H"])
  30. result = arr.min()
  31. expected = Timedelta("2H")
  32. assert result == expected
  33. result = arr.max()
  34. expected = Timedelta("5H")
  35. assert result == expected
  36. result = arr.min(skipna=False)
  37. assert result is pd.NaT
  38. result = arr.max(skipna=False)
  39. assert result is pd.NaT
  40. def test_sum(self):
  41. tdi = pd.TimedeltaIndex(["3H", "3H", "NaT", "2H", "5H", "4H"])
  42. arr = tdi.array
  43. result = arr.sum(skipna=True)
  44. expected = Timedelta(hours=17)
  45. assert isinstance(result, Timedelta)
  46. assert result == expected
  47. result = tdi.sum(skipna=True)
  48. assert isinstance(result, Timedelta)
  49. assert result == expected
  50. result = arr.sum(skipna=False)
  51. assert result is pd.NaT
  52. result = tdi.sum(skipna=False)
  53. assert result is pd.NaT
  54. result = arr.sum(min_count=9)
  55. assert result is pd.NaT
  56. result = tdi.sum(min_count=9)
  57. assert result is pd.NaT
  58. result = arr.sum(min_count=1)
  59. assert isinstance(result, Timedelta)
  60. assert result == expected
  61. result = tdi.sum(min_count=1)
  62. assert isinstance(result, Timedelta)
  63. assert result == expected
  64. def test_npsum(self):
  65. # GH#25282, GH#25335 np.sum should return a Timedelta, not timedelta64
  66. tdi = pd.TimedeltaIndex(["3H", "3H", "2H", "5H", "4H"])
  67. arr = tdi.array
  68. result = np.sum(tdi)
  69. expected = Timedelta(hours=17)
  70. assert isinstance(result, Timedelta)
  71. assert result == expected
  72. result = np.sum(arr)
  73. assert isinstance(result, Timedelta)
  74. assert result == expected
  75. def test_sum_2d_skipna_false(self):
  76. arr = np.arange(8).astype(np.int64).view("m8[s]").astype("m8[ns]").reshape(4, 2)
  77. arr[-1, -1] = "Nat"
  78. tda = TimedeltaArray(arr)
  79. result = tda.sum(skipna=False)
  80. assert result is pd.NaT
  81. result = tda.sum(axis=0, skipna=False)
  82. expected = pd.TimedeltaIndex([Timedelta(seconds=12), pd.NaT])._values
  83. tm.assert_timedelta_array_equal(result, expected)
  84. result = tda.sum(axis=1, skipna=False)
  85. expected = pd.TimedeltaIndex(
  86. [
  87. Timedelta(seconds=1),
  88. Timedelta(seconds=5),
  89. Timedelta(seconds=9),
  90. pd.NaT,
  91. ]
  92. )._values
  93. tm.assert_timedelta_array_equal(result, expected)
  94. # Adding a Timestamp makes this a test for DatetimeArray.std
  95. @pytest.mark.parametrize(
  96. "add",
  97. [
  98. Timedelta(0),
  99. pd.Timestamp("2021-01-01"),
  100. pd.Timestamp("2021-01-01", tz="UTC"),
  101. pd.Timestamp("2021-01-01", tz="Asia/Tokyo"),
  102. ],
  103. )
  104. def test_std(self, add):
  105. tdi = pd.TimedeltaIndex(["0H", "4H", "NaT", "4H", "0H", "2H"]) + add
  106. arr = tdi.array
  107. result = arr.std(skipna=True)
  108. expected = Timedelta(hours=2)
  109. assert isinstance(result, Timedelta)
  110. assert result == expected
  111. result = tdi.std(skipna=True)
  112. assert isinstance(result, Timedelta)
  113. assert result == expected
  114. if getattr(arr, "tz", None) is None:
  115. result = nanops.nanstd(np.asarray(arr), skipna=True)
  116. assert isinstance(result, np.timedelta64)
  117. assert result == expected
  118. result = arr.std(skipna=False)
  119. assert result is pd.NaT
  120. result = tdi.std(skipna=False)
  121. assert result is pd.NaT
  122. if getattr(arr, "tz", None) is None:
  123. result = nanops.nanstd(np.asarray(arr), skipna=False)
  124. assert isinstance(result, np.timedelta64)
  125. assert np.isnat(result)
  126. def test_median(self):
  127. tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"])
  128. arr = tdi.array
  129. result = arr.median(skipna=True)
  130. expected = Timedelta(hours=2)
  131. assert isinstance(result, Timedelta)
  132. assert result == expected
  133. result = tdi.median(skipna=True)
  134. assert isinstance(result, Timedelta)
  135. assert result == expected
  136. result = arr.median(skipna=False)
  137. assert result is pd.NaT
  138. result = tdi.median(skipna=False)
  139. assert result is pd.NaT
  140. def test_mean(self):
  141. tdi = pd.TimedeltaIndex(["0H", "3H", "NaT", "5H06m", "0H", "2H"])
  142. arr = tdi._data
  143. # manually verified result
  144. expected = Timedelta(arr.dropna()._ndarray.mean())
  145. result = arr.mean()
  146. assert result == expected
  147. result = arr.mean(skipna=False)
  148. assert result is pd.NaT
  149. result = arr.dropna().mean(skipna=False)
  150. assert result == expected
  151. result = arr.mean(axis=0)
  152. assert result == expected
  153. def test_mean_2d(self):
  154. tdi = pd.timedelta_range("14 days", periods=6)
  155. tda = tdi._data.reshape(3, 2)
  156. result = tda.mean(axis=0)
  157. expected = tda[1]
  158. tm.assert_timedelta_array_equal(result, expected)
  159. result = tda.mean(axis=1)
  160. expected = tda[:, 0] + Timedelta(hours=12)
  161. tm.assert_timedelta_array_equal(result, expected)
  162. result = tda.mean(axis=None)
  163. expected = tdi.mean()
  164. assert result == expected