test_constructors.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546
  1. import numpy as np
  2. import pytest
  3. from pandas._libs.tslibs.period import IncompatibleFrequency
  4. from pandas.core.dtypes.dtypes import PeriodDtype
  5. from pandas import (
  6. Index,
  7. NaT,
  8. Period,
  9. PeriodIndex,
  10. Series,
  11. date_range,
  12. offsets,
  13. period_range,
  14. )
  15. import pandas._testing as tm
  16. from pandas.core.arrays import PeriodArray
  17. class TestPeriodIndex:
  18. def test_construction_base_constructor(self):
  19. # GH 13664
  20. arr = [Period("2011-01", freq="M"), NaT, Period("2011-03", freq="M")]
  21. tm.assert_index_equal(Index(arr), PeriodIndex(arr))
  22. tm.assert_index_equal(Index(np.array(arr)), PeriodIndex(np.array(arr)))
  23. arr = [np.nan, NaT, Period("2011-03", freq="M")]
  24. tm.assert_index_equal(Index(arr), PeriodIndex(arr))
  25. tm.assert_index_equal(Index(np.array(arr)), PeriodIndex(np.array(arr)))
  26. arr = [Period("2011-01", freq="M"), NaT, Period("2011-03", freq="D")]
  27. tm.assert_index_equal(Index(arr), Index(arr, dtype=object))
  28. tm.assert_index_equal(Index(np.array(arr)), Index(np.array(arr), dtype=object))
  29. def test_base_constructor_with_period_dtype(self):
  30. dtype = PeriodDtype("D")
  31. values = ["2011-01-01", "2012-03-04", "2014-05-01"]
  32. result = Index(values, dtype=dtype)
  33. expected = PeriodIndex(values, dtype=dtype)
  34. tm.assert_index_equal(result, expected)
  35. @pytest.mark.parametrize(
  36. "values_constructor", [list, np.array, PeriodIndex, PeriodArray._from_sequence]
  37. )
  38. def test_index_object_dtype(self, values_constructor):
  39. # Index(periods, dtype=object) is an Index (not an PeriodIndex)
  40. periods = [
  41. Period("2011-01", freq="M"),
  42. NaT,
  43. Period("2011-03", freq="M"),
  44. ]
  45. values = values_constructor(periods)
  46. result = Index(values, dtype=object)
  47. assert type(result) is Index
  48. tm.assert_numpy_array_equal(result.values, np.array(values))
  49. def test_constructor_use_start_freq(self):
  50. # GH #1118
  51. p = Period("4/2/2012", freq="B")
  52. expected = period_range(start="4/2/2012", periods=10, freq="B")
  53. index = period_range(start=p, periods=10)
  54. tm.assert_index_equal(index, expected)
  55. def test_constructor_field_arrays(self):
  56. # GH #1264
  57. years = np.arange(1990, 2010).repeat(4)[2:-2]
  58. quarters = np.tile(np.arange(1, 5), 20)[2:-2]
  59. index = PeriodIndex(year=years, quarter=quarters, freq="Q-DEC")
  60. expected = period_range("1990Q3", "2009Q2", freq="Q-DEC")
  61. tm.assert_index_equal(index, expected)
  62. index2 = PeriodIndex(year=years, quarter=quarters, freq="2Q-DEC")
  63. tm.assert_numpy_array_equal(index.asi8, index2.asi8)
  64. index = PeriodIndex(year=years, quarter=quarters)
  65. tm.assert_index_equal(index, expected)
  66. years = [2007, 2007, 2007]
  67. months = [1, 2]
  68. msg = "Mismatched Period array lengths"
  69. with pytest.raises(ValueError, match=msg):
  70. PeriodIndex(year=years, month=months, freq="M")
  71. with pytest.raises(ValueError, match=msg):
  72. PeriodIndex(year=years, month=months, freq="2M")
  73. years = [2007, 2007, 2007]
  74. months = [1, 2, 3]
  75. idx = PeriodIndex(year=years, month=months, freq="M")
  76. exp = period_range("2007-01", periods=3, freq="M")
  77. tm.assert_index_equal(idx, exp)
  78. def test_constructor_U(self):
  79. # U was used as undefined period
  80. with pytest.raises(ValueError, match="Invalid frequency: X"):
  81. period_range("2007-1-1", periods=500, freq="X")
  82. def test_constructor_nano(self):
  83. idx = period_range(
  84. start=Period(ordinal=1, freq="N"), end=Period(ordinal=4, freq="N"), freq="N"
  85. )
  86. exp = PeriodIndex(
  87. [
  88. Period(ordinal=1, freq="N"),
  89. Period(ordinal=2, freq="N"),
  90. Period(ordinal=3, freq="N"),
  91. Period(ordinal=4, freq="N"),
  92. ],
  93. freq="N",
  94. )
  95. tm.assert_index_equal(idx, exp)
  96. def test_constructor_arrays_negative_year(self):
  97. years = np.arange(1960, 2000, dtype=np.int64).repeat(4)
  98. quarters = np.tile(np.array([1, 2, 3, 4], dtype=np.int64), 40)
  99. pindex = PeriodIndex(year=years, quarter=quarters)
  100. tm.assert_index_equal(pindex.year, Index(years))
  101. tm.assert_index_equal(pindex.quarter, Index(quarters))
  102. def test_constructor_invalid_quarters(self):
  103. msg = "Quarter must be 1 <= q <= 4"
  104. with pytest.raises(ValueError, match=msg):
  105. PeriodIndex(year=range(2000, 2004), quarter=list(range(4)), freq="Q-DEC")
  106. def test_constructor_corner(self):
  107. result = period_range("2007-01", periods=10.5, freq="M")
  108. exp = period_range("2007-01", periods=10, freq="M")
  109. tm.assert_index_equal(result, exp)
  110. def test_constructor_fromarraylike(self):
  111. idx = period_range("2007-01", periods=20, freq="M")
  112. # values is an array of Period, thus can retrieve freq
  113. tm.assert_index_equal(PeriodIndex(idx.values), idx)
  114. tm.assert_index_equal(PeriodIndex(list(idx.values)), idx)
  115. msg = "freq not specified and cannot be inferred"
  116. with pytest.raises(ValueError, match=msg):
  117. PeriodIndex(idx.asi8)
  118. with pytest.raises(ValueError, match=msg):
  119. PeriodIndex(list(idx.asi8))
  120. msg = "'Period' object is not iterable"
  121. with pytest.raises(TypeError, match=msg):
  122. PeriodIndex(data=Period("2007", freq="A"))
  123. result = PeriodIndex(iter(idx))
  124. tm.assert_index_equal(result, idx)
  125. result = PeriodIndex(idx)
  126. tm.assert_index_equal(result, idx)
  127. result = PeriodIndex(idx, freq="M")
  128. tm.assert_index_equal(result, idx)
  129. result = PeriodIndex(idx, freq=offsets.MonthEnd())
  130. tm.assert_index_equal(result, idx)
  131. assert result.freq == "M"
  132. result = PeriodIndex(idx, freq="2M")
  133. tm.assert_index_equal(result, idx.asfreq("2M"))
  134. assert result.freq == "2M"
  135. result = PeriodIndex(idx, freq=offsets.MonthEnd(2))
  136. tm.assert_index_equal(result, idx.asfreq("2M"))
  137. assert result.freq == "2M"
  138. result = PeriodIndex(idx, freq="D")
  139. exp = idx.asfreq("D", "e")
  140. tm.assert_index_equal(result, exp)
  141. def test_constructor_datetime64arr(self):
  142. vals = np.arange(100000, 100000 + 10000, 100, dtype=np.int64)
  143. vals = vals.view(np.dtype("M8[us]"))
  144. pi = PeriodIndex(vals, freq="D")
  145. expected = PeriodIndex(vals.astype("M8[ns]"), freq="D")
  146. tm.assert_index_equal(pi, expected)
  147. @pytest.mark.parametrize("box", [None, "series", "index"])
  148. def test_constructor_datetime64arr_ok(self, box):
  149. # https://github.com/pandas-dev/pandas/issues/23438
  150. data = date_range("2017", periods=4, freq="M")
  151. if box is None:
  152. data = data._values
  153. elif box == "series":
  154. data = Series(data)
  155. result = PeriodIndex(data, freq="D")
  156. expected = PeriodIndex(
  157. ["2017-01-31", "2017-02-28", "2017-03-31", "2017-04-30"], freq="D"
  158. )
  159. tm.assert_index_equal(result, expected)
  160. def test_constructor_dtype(self):
  161. # passing a dtype with a tz should localize
  162. idx = PeriodIndex(["2013-01", "2013-03"], dtype="period[M]")
  163. exp = PeriodIndex(["2013-01", "2013-03"], freq="M")
  164. tm.assert_index_equal(idx, exp)
  165. assert idx.dtype == "period[M]"
  166. idx = PeriodIndex(["2013-01-05", "2013-03-05"], dtype="period[3D]")
  167. exp = PeriodIndex(["2013-01-05", "2013-03-05"], freq="3D")
  168. tm.assert_index_equal(idx, exp)
  169. assert idx.dtype == "period[3D]"
  170. # if we already have a freq and its not the same, then asfreq
  171. # (not changed)
  172. idx = PeriodIndex(["2013-01-01", "2013-01-02"], freq="D")
  173. res = PeriodIndex(idx, dtype="period[M]")
  174. exp = PeriodIndex(["2013-01", "2013-01"], freq="M")
  175. tm.assert_index_equal(res, exp)
  176. assert res.dtype == "period[M]"
  177. res = PeriodIndex(idx, freq="M")
  178. tm.assert_index_equal(res, exp)
  179. assert res.dtype == "period[M]"
  180. msg = "specified freq and dtype are different"
  181. with pytest.raises(IncompatibleFrequency, match=msg):
  182. PeriodIndex(["2011-01"], freq="M", dtype="period[D]")
  183. def test_constructor_empty(self):
  184. idx = PeriodIndex([], freq="M")
  185. assert isinstance(idx, PeriodIndex)
  186. assert len(idx) == 0
  187. assert idx.freq == "M"
  188. with pytest.raises(ValueError, match="freq not specified"):
  189. PeriodIndex([])
  190. def test_constructor_pi_nat(self):
  191. idx = PeriodIndex(
  192. [Period("2011-01", freq="M"), NaT, Period("2011-01", freq="M")]
  193. )
  194. exp = PeriodIndex(["2011-01", "NaT", "2011-01"], freq="M")
  195. tm.assert_index_equal(idx, exp)
  196. idx = PeriodIndex(
  197. np.array([Period("2011-01", freq="M"), NaT, Period("2011-01", freq="M")])
  198. )
  199. tm.assert_index_equal(idx, exp)
  200. idx = PeriodIndex(
  201. [NaT, NaT, Period("2011-01", freq="M"), Period("2011-01", freq="M")]
  202. )
  203. exp = PeriodIndex(["NaT", "NaT", "2011-01", "2011-01"], freq="M")
  204. tm.assert_index_equal(idx, exp)
  205. idx = PeriodIndex(
  206. np.array(
  207. [NaT, NaT, Period("2011-01", freq="M"), Period("2011-01", freq="M")]
  208. )
  209. )
  210. tm.assert_index_equal(idx, exp)
  211. idx = PeriodIndex([NaT, NaT, "2011-01", "2011-01"], freq="M")
  212. tm.assert_index_equal(idx, exp)
  213. with pytest.raises(ValueError, match="freq not specified"):
  214. PeriodIndex([NaT, NaT])
  215. with pytest.raises(ValueError, match="freq not specified"):
  216. PeriodIndex(np.array([NaT, NaT]))
  217. with pytest.raises(ValueError, match="freq not specified"):
  218. PeriodIndex(["NaT", "NaT"])
  219. with pytest.raises(ValueError, match="freq not specified"):
  220. PeriodIndex(np.array(["NaT", "NaT"]))
  221. def test_constructor_incompat_freq(self):
  222. msg = "Input has different freq=D from PeriodIndex\\(freq=M\\)"
  223. with pytest.raises(IncompatibleFrequency, match=msg):
  224. PeriodIndex([Period("2011-01", freq="M"), NaT, Period("2011-01", freq="D")])
  225. with pytest.raises(IncompatibleFrequency, match=msg):
  226. PeriodIndex(
  227. np.array(
  228. [Period("2011-01", freq="M"), NaT, Period("2011-01", freq="D")]
  229. )
  230. )
  231. # first element is NaT
  232. with pytest.raises(IncompatibleFrequency, match=msg):
  233. PeriodIndex([NaT, Period("2011-01", freq="M"), Period("2011-01", freq="D")])
  234. with pytest.raises(IncompatibleFrequency, match=msg):
  235. PeriodIndex(
  236. np.array(
  237. [NaT, Period("2011-01", freq="M"), Period("2011-01", freq="D")]
  238. )
  239. )
  240. def test_constructor_mixed(self):
  241. idx = PeriodIndex(["2011-01", NaT, Period("2011-01", freq="M")])
  242. exp = PeriodIndex(["2011-01", "NaT", "2011-01"], freq="M")
  243. tm.assert_index_equal(idx, exp)
  244. idx = PeriodIndex(["NaT", NaT, Period("2011-01", freq="M")])
  245. exp = PeriodIndex(["NaT", "NaT", "2011-01"], freq="M")
  246. tm.assert_index_equal(idx, exp)
  247. idx = PeriodIndex([Period("2011-01-01", freq="D"), NaT, "2012-01-01"])
  248. exp = PeriodIndex(["2011-01-01", "NaT", "2012-01-01"], freq="D")
  249. tm.assert_index_equal(idx, exp)
  250. def test_constructor_simple_new(self):
  251. idx = period_range("2007-01", name="p", periods=2, freq="M")
  252. with pytest.raises(AssertionError, match="<class .*PeriodIndex'>"):
  253. idx._simple_new(idx, name="p")
  254. result = idx._simple_new(idx._data, name="p")
  255. tm.assert_index_equal(result, idx)
  256. msg = "Should be numpy array of type i8"
  257. with pytest.raises(AssertionError, match=msg):
  258. # Need ndarray, not int64 Index
  259. type(idx._data)._simple_new(Index(idx.asi8), freq=idx.freq)
  260. arr = type(idx._data)._simple_new(idx.asi8, freq=idx.freq)
  261. result = idx._simple_new(arr, name="p")
  262. tm.assert_index_equal(result, idx)
  263. def test_constructor_simple_new_empty(self):
  264. # GH13079
  265. idx = PeriodIndex([], freq="M", name="p")
  266. with pytest.raises(AssertionError, match="<class .*PeriodIndex'>"):
  267. idx._simple_new(idx, name="p")
  268. result = idx._simple_new(idx._data, name="p")
  269. tm.assert_index_equal(result, idx)
  270. @pytest.mark.parametrize("floats", [[1.1, 2.1], np.array([1.1, 2.1])])
  271. def test_constructor_floats(self, floats):
  272. with pytest.raises(AssertionError, match="<class "):
  273. PeriodIndex._simple_new(floats)
  274. msg = "PeriodIndex does not allow floating point in construction"
  275. with pytest.raises(TypeError, match=msg):
  276. PeriodIndex(floats)
  277. def test_constructor_nat(self):
  278. msg = "start and end must not be NaT"
  279. with pytest.raises(ValueError, match=msg):
  280. period_range(start="NaT", end="2011-01-01", freq="M")
  281. with pytest.raises(ValueError, match=msg):
  282. period_range(start="2011-01-01", end="NaT", freq="M")
  283. def test_constructor_year_and_quarter(self):
  284. year = Series([2001, 2002, 2003])
  285. quarter = year - 2000
  286. idx = PeriodIndex(year=year, quarter=quarter)
  287. strs = [f"{t[0]:d}Q{t[1]:d}" for t in zip(quarter, year)]
  288. lops = list(map(Period, strs))
  289. p = PeriodIndex(lops)
  290. tm.assert_index_equal(p, idx)
  291. def test_constructor_freq_mult(self):
  292. # GH #7811
  293. pidx = period_range(start="2014-01", freq="2M", periods=4)
  294. expected = PeriodIndex(["2014-01", "2014-03", "2014-05", "2014-07"], freq="2M")
  295. tm.assert_index_equal(pidx, expected)
  296. pidx = period_range(start="2014-01-02", end="2014-01-15", freq="3D")
  297. expected = PeriodIndex(
  298. ["2014-01-02", "2014-01-05", "2014-01-08", "2014-01-11", "2014-01-14"],
  299. freq="3D",
  300. )
  301. tm.assert_index_equal(pidx, expected)
  302. pidx = period_range(end="2014-01-01 17:00", freq="4H", periods=3)
  303. expected = PeriodIndex(
  304. ["2014-01-01 09:00", "2014-01-01 13:00", "2014-01-01 17:00"], freq="4H"
  305. )
  306. tm.assert_index_equal(pidx, expected)
  307. msg = "Frequency must be positive, because it represents span: -1M"
  308. with pytest.raises(ValueError, match=msg):
  309. PeriodIndex(["2011-01"], freq="-1M")
  310. msg = "Frequency must be positive, because it represents span: 0M"
  311. with pytest.raises(ValueError, match=msg):
  312. PeriodIndex(["2011-01"], freq="0M")
  313. msg = "Frequency must be positive, because it represents span: 0M"
  314. with pytest.raises(ValueError, match=msg):
  315. period_range("2011-01", periods=3, freq="0M")
  316. @pytest.mark.parametrize("freq", ["A", "M", "D", "T", "S"])
  317. @pytest.mark.parametrize("mult", [1, 2, 3, 4, 5])
  318. def test_constructor_freq_mult_dti_compat(self, mult, freq):
  319. freqstr = str(mult) + freq
  320. pidx = period_range(start="2014-04-01", freq=freqstr, periods=10)
  321. expected = date_range(start="2014-04-01", freq=freqstr, periods=10).to_period(
  322. freqstr
  323. )
  324. tm.assert_index_equal(pidx, expected)
  325. def test_constructor_freq_combined(self):
  326. for freq in ["1D1H", "1H1D"]:
  327. pidx = PeriodIndex(["2016-01-01", "2016-01-02"], freq=freq)
  328. expected = PeriodIndex(["2016-01-01 00:00", "2016-01-02 00:00"], freq="25H")
  329. for freq in ["1D1H", "1H1D"]:
  330. pidx = period_range(start="2016-01-01", periods=2, freq=freq)
  331. expected = PeriodIndex(["2016-01-01 00:00", "2016-01-02 01:00"], freq="25H")
  332. tm.assert_index_equal(pidx, expected)
  333. def test_constructor(self):
  334. pi = period_range(freq="A", start="1/1/2001", end="12/1/2009")
  335. assert len(pi) == 9
  336. pi = period_range(freq="Q", start="1/1/2001", end="12/1/2009")
  337. assert len(pi) == 4 * 9
  338. pi = period_range(freq="M", start="1/1/2001", end="12/1/2009")
  339. assert len(pi) == 12 * 9
  340. pi = period_range(freq="D", start="1/1/2001", end="12/31/2009")
  341. assert len(pi) == 365 * 9 + 2
  342. pi = period_range(freq="B", start="1/1/2001", end="12/31/2009")
  343. assert len(pi) == 261 * 9
  344. pi = period_range(freq="H", start="1/1/2001", end="12/31/2001 23:00")
  345. assert len(pi) == 365 * 24
  346. pi = period_range(freq="Min", start="1/1/2001", end="1/1/2001 23:59")
  347. assert len(pi) == 24 * 60
  348. pi = period_range(freq="S", start="1/1/2001", end="1/1/2001 23:59:59")
  349. assert len(pi) == 24 * 60 * 60
  350. start = Period("02-Apr-2005", "B")
  351. i1 = period_range(start=start, periods=20)
  352. assert len(i1) == 20
  353. assert i1.freq == start.freq
  354. assert i1[0] == start
  355. end_intv = Period("2006-12-31", "W")
  356. i1 = period_range(end=end_intv, periods=10)
  357. assert len(i1) == 10
  358. assert i1.freq == end_intv.freq
  359. assert i1[-1] == end_intv
  360. end_intv = Period("2006-12-31", "1w")
  361. i2 = period_range(end=end_intv, periods=10)
  362. assert len(i1) == len(i2)
  363. assert (i1 == i2).all()
  364. assert i1.freq == i2.freq
  365. end_intv = Period("2005-05-01", "B")
  366. i1 = period_range(start=start, end=end_intv)
  367. # infer freq from first element
  368. i2 = PeriodIndex([end_intv, Period("2005-05-05", "B")])
  369. assert len(i2) == 2
  370. assert i2[0] == end_intv
  371. i2 = PeriodIndex(np.array([end_intv, Period("2005-05-05", "B")]))
  372. assert len(i2) == 2
  373. assert i2[0] == end_intv
  374. # Mixed freq should fail
  375. vals = [end_intv, Period("2006-12-31", "w")]
  376. msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)"
  377. with pytest.raises(IncompatibleFrequency, match=msg):
  378. PeriodIndex(vals)
  379. vals = np.array(vals)
  380. with pytest.raises(IncompatibleFrequency, match=msg):
  381. PeriodIndex(vals)
  382. # tuple freq disallowed GH#34703
  383. with pytest.raises(TypeError, match="pass as a string instead"):
  384. Period("2006-12-31", ("w", 1))
  385. @pytest.mark.parametrize(
  386. "freq", ["M", "Q", "A", "D", "B", "T", "S", "L", "U", "N", "H"]
  387. )
  388. def test_recreate_from_data(self, freq):
  389. org = period_range(start="2001/04/01", freq=freq, periods=1)
  390. idx = PeriodIndex(org.values, freq=freq)
  391. tm.assert_index_equal(idx, org)
  392. def test_map_with_string_constructor(self):
  393. raw = [2005, 2007, 2009]
  394. index = PeriodIndex(raw, freq="A")
  395. expected = Index([str(num) for num in raw])
  396. res = index.map(str)
  397. # should return an Index
  398. assert isinstance(res, Index)
  399. # preserve element types
  400. assert all(isinstance(resi, str) for resi in res)
  401. # lastly, values should compare equal
  402. tm.assert_index_equal(res, expected)
  403. class TestShallowCopy:
  404. def test_shallow_copy_empty(self):
  405. # GH#13067
  406. idx = PeriodIndex([], freq="M")
  407. result = idx._view()
  408. expected = idx
  409. tm.assert_index_equal(result, expected)
  410. def test_shallow_copy_disallow_i8(self):
  411. # GH#24391
  412. pi = period_range("2018-01-01", periods=3, freq="2D")
  413. with pytest.raises(AssertionError, match="ndarray"):
  414. pi._shallow_copy(pi.asi8)
  415. def test_shallow_copy_requires_disallow_period_index(self):
  416. pi = period_range("2018-01-01", periods=3, freq="2D")
  417. with pytest.raises(AssertionError, match="PeriodIndex"):
  418. pi._shallow_copy(pi)
  419. class TestSeriesPeriod:
  420. def test_constructor_cant_cast_period(self):
  421. msg = "Cannot cast PeriodIndex to dtype float64"
  422. with pytest.raises(TypeError, match=msg):
  423. Series(period_range("2000-01-01", periods=10, freq="D"), dtype=float)
  424. def test_constructor_cast_object(self):
  425. s = Series(period_range("1/1/2000", periods=10), dtype=PeriodDtype("D"))
  426. exp = Series(period_range("1/1/2000", periods=10))
  427. tm.assert_series_equal(s, exp)