test_align.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. from datetime import timezone
  2. import numpy as np
  3. import pytest
  4. import pandas as pd
  5. from pandas import (
  6. Series,
  7. date_range,
  8. period_range,
  9. )
  10. import pandas._testing as tm
  11. @pytest.mark.parametrize(
  12. "first_slice,second_slice",
  13. [
  14. [[2, None], [None, -5]],
  15. [[None, 0], [None, -5]],
  16. [[None, -5], [None, 0]],
  17. [[None, 0], [None, 0]],
  18. ],
  19. )
  20. @pytest.mark.parametrize("fill", [None, -1])
  21. def test_align(datetime_series, first_slice, second_slice, join_type, fill):
  22. a = datetime_series[slice(*first_slice)]
  23. b = datetime_series[slice(*second_slice)]
  24. aa, ab = a.align(b, join=join_type, fill_value=fill)
  25. join_index = a.index.join(b.index, how=join_type)
  26. if fill is not None:
  27. diff_a = aa.index.difference(join_index)
  28. diff_b = ab.index.difference(join_index)
  29. if len(diff_a) > 0:
  30. assert (aa.reindex(diff_a) == fill).all()
  31. if len(diff_b) > 0:
  32. assert (ab.reindex(diff_b) == fill).all()
  33. ea = a.reindex(join_index)
  34. eb = b.reindex(join_index)
  35. if fill is not None:
  36. ea = ea.fillna(fill)
  37. eb = eb.fillna(fill)
  38. tm.assert_series_equal(aa, ea)
  39. tm.assert_series_equal(ab, eb)
  40. assert aa.name == "ts"
  41. assert ea.name == "ts"
  42. assert ab.name == "ts"
  43. assert eb.name == "ts"
  44. @pytest.mark.parametrize(
  45. "first_slice,second_slice",
  46. [
  47. [[2, None], [None, -5]],
  48. [[None, 0], [None, -5]],
  49. [[None, -5], [None, 0]],
  50. [[None, 0], [None, 0]],
  51. ],
  52. )
  53. @pytest.mark.parametrize("method", ["pad", "bfill"])
  54. @pytest.mark.parametrize("limit", [None, 1])
  55. def test_align_fill_method(
  56. datetime_series, first_slice, second_slice, join_type, method, limit
  57. ):
  58. a = datetime_series[slice(*first_slice)]
  59. b = datetime_series[slice(*second_slice)]
  60. aa, ab = a.align(b, join=join_type, method=method, limit=limit)
  61. join_index = a.index.join(b.index, how=join_type)
  62. ea = a.reindex(join_index)
  63. eb = b.reindex(join_index)
  64. ea = ea.fillna(method=method, limit=limit)
  65. eb = eb.fillna(method=method, limit=limit)
  66. tm.assert_series_equal(aa, ea)
  67. tm.assert_series_equal(ab, eb)
  68. def test_align_nocopy(datetime_series, using_copy_on_write):
  69. b = datetime_series[:5].copy()
  70. # do copy
  71. a = datetime_series.copy()
  72. ra, _ = a.align(b, join="left")
  73. ra[:5] = 5
  74. assert not (a[:5] == 5).any()
  75. # do not copy
  76. a = datetime_series.copy()
  77. ra, _ = a.align(b, join="left", copy=False)
  78. ra[:5] = 5
  79. if using_copy_on_write:
  80. assert not (a[:5] == 5).any()
  81. else:
  82. assert (a[:5] == 5).all()
  83. # do copy
  84. a = datetime_series.copy()
  85. b = datetime_series[:5].copy()
  86. _, rb = a.align(b, join="right")
  87. rb[:3] = 5
  88. assert not (b[:3] == 5).any()
  89. # do not copy
  90. a = datetime_series.copy()
  91. b = datetime_series[:5].copy()
  92. _, rb = a.align(b, join="right", copy=False)
  93. rb[:2] = 5
  94. if using_copy_on_write:
  95. assert not (b[:2] == 5).any()
  96. else:
  97. assert (b[:2] == 5).all()
  98. def test_align_same_index(datetime_series, using_copy_on_write):
  99. a, b = datetime_series.align(datetime_series, copy=False)
  100. assert a.index is datetime_series.index
  101. assert b.index is datetime_series.index
  102. a, b = datetime_series.align(datetime_series, copy=True)
  103. if not using_copy_on_write:
  104. assert a.index is not datetime_series.index
  105. assert b.index is not datetime_series.index
  106. else:
  107. assert a.index is datetime_series.index
  108. assert b.index is datetime_series.index
  109. def test_align_multiindex():
  110. # GH 10665
  111. midx = pd.MultiIndex.from_product(
  112. [range(2), range(3), range(2)], names=("a", "b", "c")
  113. )
  114. idx = pd.Index(range(2), name="b")
  115. s1 = Series(np.arange(12, dtype="int64"), index=midx)
  116. s2 = Series(np.arange(2, dtype="int64"), index=idx)
  117. # these must be the same results (but flipped)
  118. res1l, res1r = s1.align(s2, join="left")
  119. res2l, res2r = s2.align(s1, join="right")
  120. expl = s1
  121. tm.assert_series_equal(expl, res1l)
  122. tm.assert_series_equal(expl, res2r)
  123. expr = Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
  124. tm.assert_series_equal(expr, res1r)
  125. tm.assert_series_equal(expr, res2l)
  126. res1l, res1r = s1.align(s2, join="right")
  127. res2l, res2r = s2.align(s1, join="left")
  128. exp_idx = pd.MultiIndex.from_product(
  129. [range(2), range(2), range(2)], names=("a", "b", "c")
  130. )
  131. expl = Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
  132. tm.assert_series_equal(expl, res1l)
  133. tm.assert_series_equal(expl, res2r)
  134. expr = Series([0, 0, 1, 1] * 2, index=exp_idx)
  135. tm.assert_series_equal(expr, res1r)
  136. tm.assert_series_equal(expr, res2l)
  137. @pytest.mark.parametrize("method", ["backfill", "bfill", "pad", "ffill", None])
  138. def test_align_with_dataframe_method(method):
  139. # GH31788
  140. ser = Series(range(3), index=range(3))
  141. df = pd.DataFrame(0.0, index=range(3), columns=range(3))
  142. result_ser, result_df = ser.align(df, method=method)
  143. tm.assert_series_equal(result_ser, ser)
  144. tm.assert_frame_equal(result_df, df)
  145. def test_align_dt64tzindex_mismatched_tzs():
  146. idx1 = date_range("2001", periods=5, freq="H", tz="US/Eastern")
  147. ser = Series(np.random.randn(len(idx1)), index=idx1)
  148. ser_central = ser.tz_convert("US/Central")
  149. # different timezones convert to UTC
  150. new1, new2 = ser.align(ser_central)
  151. assert new1.index.tz is timezone.utc
  152. assert new2.index.tz is timezone.utc
  153. def test_align_periodindex(join_type):
  154. rng = period_range("1/1/2000", "1/1/2010", freq="A")
  155. ts = Series(np.random.randn(len(rng)), index=rng)
  156. # TODO: assert something?
  157. ts.align(ts[::2], join=join_type)
  158. def test_align_left_fewer_levels():
  159. # GH#45224
  160. left = Series([2], index=pd.MultiIndex.from_tuples([(1, 3)], names=["a", "c"]))
  161. right = Series(
  162. [1], index=pd.MultiIndex.from_tuples([(1, 2, 3)], names=["a", "b", "c"])
  163. )
  164. result_left, result_right = left.align(right)
  165. expected_right = Series(
  166. [1], index=pd.MultiIndex.from_tuples([(1, 3, 2)], names=["a", "c", "b"])
  167. )
  168. expected_left = Series(
  169. [2], index=pd.MultiIndex.from_tuples([(1, 3, 2)], names=["a", "c", "b"])
  170. )
  171. tm.assert_series_equal(result_left, expected_left)
  172. tm.assert_series_equal(result_right, expected_right)
  173. def test_align_left_different_named_levels():
  174. # GH#45224
  175. left = Series(
  176. [2], index=pd.MultiIndex.from_tuples([(1, 4, 3)], names=["a", "d", "c"])
  177. )
  178. right = Series(
  179. [1], index=pd.MultiIndex.from_tuples([(1, 2, 3)], names=["a", "b", "c"])
  180. )
  181. result_left, result_right = left.align(right)
  182. expected_left = Series(
  183. [2], index=pd.MultiIndex.from_tuples([(1, 3, 4, 2)], names=["a", "c", "d", "b"])
  184. )
  185. expected_right = Series(
  186. [1], index=pd.MultiIndex.from_tuples([(1, 3, 4, 2)], names=["a", "c", "d", "b"])
  187. )
  188. tm.assert_series_equal(result_left, expected_left)
  189. tm.assert_series_equal(result_right, expected_right)