test_interval_range.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355
  1. from datetime import timedelta
  2. import numpy as np
  3. import pytest
  4. from pandas.core.dtypes.common import is_integer
  5. from pandas import (
  6. DateOffset,
  7. Interval,
  8. IntervalIndex,
  9. Timedelta,
  10. Timestamp,
  11. date_range,
  12. interval_range,
  13. timedelta_range,
  14. )
  15. import pandas._testing as tm
  16. from pandas.tseries.offsets import Day
  17. @pytest.fixture(params=[None, "foo"])
  18. def name(request):
  19. return request.param
  20. class TestIntervalRange:
  21. @pytest.mark.parametrize("freq, periods", [(1, 100), (2.5, 40), (5, 20), (25, 4)])
  22. def test_constructor_numeric(self, closed, name, freq, periods):
  23. start, end = 0, 100
  24. breaks = np.arange(101, step=freq)
  25. expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
  26. # defined from start/end/freq
  27. result = interval_range(
  28. start=start, end=end, freq=freq, name=name, closed=closed
  29. )
  30. tm.assert_index_equal(result, expected)
  31. # defined from start/periods/freq
  32. result = interval_range(
  33. start=start, periods=periods, freq=freq, name=name, closed=closed
  34. )
  35. tm.assert_index_equal(result, expected)
  36. # defined from end/periods/freq
  37. result = interval_range(
  38. end=end, periods=periods, freq=freq, name=name, closed=closed
  39. )
  40. tm.assert_index_equal(result, expected)
  41. # GH 20976: linspace behavior defined from start/end/periods
  42. result = interval_range(
  43. start=start, end=end, periods=periods, name=name, closed=closed
  44. )
  45. tm.assert_index_equal(result, expected)
  46. @pytest.mark.parametrize("tz", [None, "US/Eastern"])
  47. @pytest.mark.parametrize(
  48. "freq, periods", [("D", 364), ("2D", 182), ("22D18H", 16), ("M", 11)]
  49. )
  50. def test_constructor_timestamp(self, closed, name, freq, periods, tz):
  51. start, end = Timestamp("20180101", tz=tz), Timestamp("20181231", tz=tz)
  52. breaks = date_range(start=start, end=end, freq=freq)
  53. expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
  54. # defined from start/end/freq
  55. result = interval_range(
  56. start=start, end=end, freq=freq, name=name, closed=closed
  57. )
  58. tm.assert_index_equal(result, expected)
  59. # defined from start/periods/freq
  60. result = interval_range(
  61. start=start, periods=periods, freq=freq, name=name, closed=closed
  62. )
  63. tm.assert_index_equal(result, expected)
  64. # defined from end/periods/freq
  65. result = interval_range(
  66. end=end, periods=periods, freq=freq, name=name, closed=closed
  67. )
  68. tm.assert_index_equal(result, expected)
  69. # GH 20976: linspace behavior defined from start/end/periods
  70. if not breaks.freq.is_anchored() and tz is None:
  71. # matches expected only for non-anchored offsets and tz naive
  72. # (anchored/DST transitions cause unequal spacing in expected)
  73. result = interval_range(
  74. start=start, end=end, periods=periods, name=name, closed=closed
  75. )
  76. tm.assert_index_equal(result, expected)
  77. @pytest.mark.parametrize(
  78. "freq, periods", [("D", 100), ("2D12H", 40), ("5D", 20), ("25D", 4)]
  79. )
  80. def test_constructor_timedelta(self, closed, name, freq, periods):
  81. start, end = Timedelta("0 days"), Timedelta("100 days")
  82. breaks = timedelta_range(start=start, end=end, freq=freq)
  83. expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
  84. # defined from start/end/freq
  85. result = interval_range(
  86. start=start, end=end, freq=freq, name=name, closed=closed
  87. )
  88. tm.assert_index_equal(result, expected)
  89. # defined from start/periods/freq
  90. result = interval_range(
  91. start=start, periods=periods, freq=freq, name=name, closed=closed
  92. )
  93. tm.assert_index_equal(result, expected)
  94. # defined from end/periods/freq
  95. result = interval_range(
  96. end=end, periods=periods, freq=freq, name=name, closed=closed
  97. )
  98. tm.assert_index_equal(result, expected)
  99. # GH 20976: linspace behavior defined from start/end/periods
  100. result = interval_range(
  101. start=start, end=end, periods=periods, name=name, closed=closed
  102. )
  103. tm.assert_index_equal(result, expected)
  104. @pytest.mark.parametrize(
  105. "start, end, freq, expected_endpoint",
  106. [
  107. (0, 10, 3, 9),
  108. (0, 10, 1.5, 9),
  109. (0.5, 10, 3, 9.5),
  110. (Timedelta("0D"), Timedelta("10D"), "2D4H", Timedelta("8D16H")),
  111. (
  112. Timestamp("2018-01-01"),
  113. Timestamp("2018-02-09"),
  114. "MS",
  115. Timestamp("2018-02-01"),
  116. ),
  117. (
  118. Timestamp("2018-01-01", tz="US/Eastern"),
  119. Timestamp("2018-01-20", tz="US/Eastern"),
  120. "5D12H",
  121. Timestamp("2018-01-17 12:00:00", tz="US/Eastern"),
  122. ),
  123. ],
  124. )
  125. def test_early_truncation(self, start, end, freq, expected_endpoint):
  126. # index truncates early if freq causes end to be skipped
  127. result = interval_range(start=start, end=end, freq=freq)
  128. result_endpoint = result.right[-1]
  129. assert result_endpoint == expected_endpoint
  130. @pytest.mark.parametrize(
  131. "start, end, freq",
  132. [(0.5, None, None), (None, 4.5, None), (0.5, None, 1.5), (None, 6.5, 1.5)],
  133. )
  134. def test_no_invalid_float_truncation(self, start, end, freq):
  135. # GH 21161
  136. if freq is None:
  137. breaks = [0.5, 1.5, 2.5, 3.5, 4.5]
  138. else:
  139. breaks = [0.5, 2.0, 3.5, 5.0, 6.5]
  140. expected = IntervalIndex.from_breaks(breaks)
  141. result = interval_range(start=start, end=end, periods=4, freq=freq)
  142. tm.assert_index_equal(result, expected)
  143. @pytest.mark.parametrize(
  144. "start, mid, end",
  145. [
  146. (
  147. Timestamp("2018-03-10", tz="US/Eastern"),
  148. Timestamp("2018-03-10 23:30:00", tz="US/Eastern"),
  149. Timestamp("2018-03-12", tz="US/Eastern"),
  150. ),
  151. (
  152. Timestamp("2018-11-03", tz="US/Eastern"),
  153. Timestamp("2018-11-04 00:30:00", tz="US/Eastern"),
  154. Timestamp("2018-11-05", tz="US/Eastern"),
  155. ),
  156. ],
  157. )
  158. def test_linspace_dst_transition(self, start, mid, end):
  159. # GH 20976: linspace behavior defined from start/end/periods
  160. # accounts for the hour gained/lost during DST transition
  161. result = interval_range(start=start, end=end, periods=2)
  162. expected = IntervalIndex.from_breaks([start, mid, end])
  163. tm.assert_index_equal(result, expected)
  164. @pytest.mark.parametrize("freq", [2, 2.0])
  165. @pytest.mark.parametrize("end", [10, 10.0])
  166. @pytest.mark.parametrize("start", [0, 0.0])
  167. def test_float_subtype(self, start, end, freq):
  168. # Has float subtype if any of start/end/freq are float, even if all
  169. # resulting endpoints can safely be upcast to integers
  170. # defined from start/end/freq
  171. index = interval_range(start=start, end=end, freq=freq)
  172. result = index.dtype.subtype
  173. expected = "int64" if is_integer(start + end + freq) else "float64"
  174. assert result == expected
  175. # defined from start/periods/freq
  176. index = interval_range(start=start, periods=5, freq=freq)
  177. result = index.dtype.subtype
  178. expected = "int64" if is_integer(start + freq) else "float64"
  179. assert result == expected
  180. # defined from end/periods/freq
  181. index = interval_range(end=end, periods=5, freq=freq)
  182. result = index.dtype.subtype
  183. expected = "int64" if is_integer(end + freq) else "float64"
  184. assert result == expected
  185. # GH 20976: linspace behavior defined from start/end/periods
  186. index = interval_range(start=start, end=end, periods=5)
  187. result = index.dtype.subtype
  188. expected = "int64" if is_integer(start + end) else "float64"
  189. assert result == expected
  190. def test_constructor_coverage(self):
  191. # float value for periods
  192. expected = interval_range(start=0, periods=10)
  193. result = interval_range(start=0, periods=10.5)
  194. tm.assert_index_equal(result, expected)
  195. # equivalent timestamp-like start/end
  196. start, end = Timestamp("2017-01-01"), Timestamp("2017-01-15")
  197. expected = interval_range(start=start, end=end)
  198. result = interval_range(start=start.to_pydatetime(), end=end.to_pydatetime())
  199. tm.assert_index_equal(result, expected)
  200. result = interval_range(start=start.asm8, end=end.asm8)
  201. tm.assert_index_equal(result, expected)
  202. # equivalent freq with timestamp
  203. equiv_freq = [
  204. "D",
  205. Day(),
  206. Timedelta(days=1),
  207. timedelta(days=1),
  208. DateOffset(days=1),
  209. ]
  210. for freq in equiv_freq:
  211. result = interval_range(start=start, end=end, freq=freq)
  212. tm.assert_index_equal(result, expected)
  213. # equivalent timedelta-like start/end
  214. start, end = Timedelta(days=1), Timedelta(days=10)
  215. expected = interval_range(start=start, end=end)
  216. result = interval_range(start=start.to_pytimedelta(), end=end.to_pytimedelta())
  217. tm.assert_index_equal(result, expected)
  218. result = interval_range(start=start.asm8, end=end.asm8)
  219. tm.assert_index_equal(result, expected)
  220. # equivalent freq with timedelta
  221. equiv_freq = ["D", Day(), Timedelta(days=1), timedelta(days=1)]
  222. for freq in equiv_freq:
  223. result = interval_range(start=start, end=end, freq=freq)
  224. tm.assert_index_equal(result, expected)
  225. def test_errors(self):
  226. # not enough params
  227. msg = (
  228. "Of the four parameters: start, end, periods, and freq, "
  229. "exactly three must be specified"
  230. )
  231. with pytest.raises(ValueError, match=msg):
  232. interval_range(start=0)
  233. with pytest.raises(ValueError, match=msg):
  234. interval_range(end=5)
  235. with pytest.raises(ValueError, match=msg):
  236. interval_range(periods=2)
  237. with pytest.raises(ValueError, match=msg):
  238. interval_range()
  239. # too many params
  240. with pytest.raises(ValueError, match=msg):
  241. interval_range(start=0, end=5, periods=6, freq=1.5)
  242. # mixed units
  243. msg = "start, end, freq need to be type compatible"
  244. with pytest.raises(TypeError, match=msg):
  245. interval_range(start=0, end=Timestamp("20130101"), freq=2)
  246. with pytest.raises(TypeError, match=msg):
  247. interval_range(start=0, end=Timedelta("1 day"), freq=2)
  248. with pytest.raises(TypeError, match=msg):
  249. interval_range(start=0, end=10, freq="D")
  250. with pytest.raises(TypeError, match=msg):
  251. interval_range(start=Timestamp("20130101"), end=10, freq="D")
  252. with pytest.raises(TypeError, match=msg):
  253. interval_range(
  254. start=Timestamp("20130101"), end=Timedelta("1 day"), freq="D"
  255. )
  256. with pytest.raises(TypeError, match=msg):
  257. interval_range(
  258. start=Timestamp("20130101"), end=Timestamp("20130110"), freq=2
  259. )
  260. with pytest.raises(TypeError, match=msg):
  261. interval_range(start=Timedelta("1 day"), end=10, freq="D")
  262. with pytest.raises(TypeError, match=msg):
  263. interval_range(
  264. start=Timedelta("1 day"), end=Timestamp("20130110"), freq="D"
  265. )
  266. with pytest.raises(TypeError, match=msg):
  267. interval_range(start=Timedelta("1 day"), end=Timedelta("10 days"), freq=2)
  268. # invalid periods
  269. msg = "periods must be a number, got foo"
  270. with pytest.raises(TypeError, match=msg):
  271. interval_range(start=0, periods="foo")
  272. # invalid start
  273. msg = "start must be numeric or datetime-like, got foo"
  274. with pytest.raises(ValueError, match=msg):
  275. interval_range(start="foo", periods=10)
  276. # invalid end
  277. msg = r"end must be numeric or datetime-like, got \(0, 1\]"
  278. with pytest.raises(ValueError, match=msg):
  279. interval_range(end=Interval(0, 1), periods=10)
  280. # invalid freq for datetime-like
  281. msg = "freq must be numeric or convertible to DateOffset, got foo"
  282. with pytest.raises(ValueError, match=msg):
  283. interval_range(start=0, end=10, freq="foo")
  284. with pytest.raises(ValueError, match=msg):
  285. interval_range(start=Timestamp("20130101"), periods=10, freq="foo")
  286. with pytest.raises(ValueError, match=msg):
  287. interval_range(end=Timedelta("1 day"), periods=10, freq="foo")
  288. # mixed tz
  289. start = Timestamp("2017-01-01", tz="US/Eastern")
  290. end = Timestamp("2017-01-07", tz="US/Pacific")
  291. msg = "Start and end cannot both be tz-aware with different timezones"
  292. with pytest.raises(TypeError, match=msg):
  293. interval_range(start=start, end=end)