test_functions.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. import numpy as np
  2. import pytest
  3. from pandas import (
  4. DataFrame,
  5. Series,
  6. concat,
  7. merge,
  8. )
  9. import pandas._testing as tm
  10. from pandas.tests.copy_view.util import get_array
  11. def test_concat_frames(using_copy_on_write):
  12. df = DataFrame({"b": ["a"] * 3})
  13. df2 = DataFrame({"a": ["a"] * 3})
  14. df_orig = df.copy()
  15. result = concat([df, df2], axis=1)
  16. if using_copy_on_write:
  17. assert np.shares_memory(get_array(result, "b"), get_array(df, "b"))
  18. assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
  19. else:
  20. assert not np.shares_memory(get_array(result, "b"), get_array(df, "b"))
  21. assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
  22. result.iloc[0, 0] = "d"
  23. if using_copy_on_write:
  24. assert not np.shares_memory(get_array(result, "b"), get_array(df, "b"))
  25. assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
  26. result.iloc[0, 1] = "d"
  27. if using_copy_on_write:
  28. assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
  29. tm.assert_frame_equal(df, df_orig)
  30. def test_concat_frames_updating_input(using_copy_on_write):
  31. df = DataFrame({"b": ["a"] * 3})
  32. df2 = DataFrame({"a": ["a"] * 3})
  33. result = concat([df, df2], axis=1)
  34. if using_copy_on_write:
  35. assert np.shares_memory(get_array(result, "b"), get_array(df, "b"))
  36. assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
  37. else:
  38. assert not np.shares_memory(get_array(result, "b"), get_array(df, "b"))
  39. assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
  40. expected = result.copy()
  41. df.iloc[0, 0] = "d"
  42. if using_copy_on_write:
  43. assert not np.shares_memory(get_array(result, "b"), get_array(df, "b"))
  44. assert np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
  45. df2.iloc[0, 0] = "d"
  46. if using_copy_on_write:
  47. assert not np.shares_memory(get_array(result, "a"), get_array(df2, "a"))
  48. tm.assert_frame_equal(result, expected)
  49. def test_concat_series(using_copy_on_write):
  50. ser = Series([1, 2], name="a")
  51. ser2 = Series([3, 4], name="b")
  52. ser_orig = ser.copy()
  53. ser2_orig = ser2.copy()
  54. result = concat([ser, ser2], axis=1)
  55. if using_copy_on_write:
  56. assert np.shares_memory(get_array(result, "a"), ser.values)
  57. assert np.shares_memory(get_array(result, "b"), ser2.values)
  58. else:
  59. assert not np.shares_memory(get_array(result, "a"), ser.values)
  60. assert not np.shares_memory(get_array(result, "b"), ser2.values)
  61. result.iloc[0, 0] = 100
  62. if using_copy_on_write:
  63. assert not np.shares_memory(get_array(result, "a"), ser.values)
  64. assert np.shares_memory(get_array(result, "b"), ser2.values)
  65. result.iloc[0, 1] = 1000
  66. if using_copy_on_write:
  67. assert not np.shares_memory(get_array(result, "b"), ser2.values)
  68. tm.assert_series_equal(ser, ser_orig)
  69. tm.assert_series_equal(ser2, ser2_orig)
  70. def test_concat_frames_chained(using_copy_on_write):
  71. df1 = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
  72. df2 = DataFrame({"c": [4, 5, 6]})
  73. df3 = DataFrame({"d": [4, 5, 6]})
  74. result = concat([concat([df1, df2], axis=1), df3], axis=1)
  75. expected = result.copy()
  76. if using_copy_on_write:
  77. assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
  78. assert np.shares_memory(get_array(result, "c"), get_array(df2, "c"))
  79. assert np.shares_memory(get_array(result, "d"), get_array(df3, "d"))
  80. else:
  81. assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
  82. assert not np.shares_memory(get_array(result, "c"), get_array(df2, "c"))
  83. assert not np.shares_memory(get_array(result, "d"), get_array(df3, "d"))
  84. df1.iloc[0, 0] = 100
  85. if using_copy_on_write:
  86. assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
  87. tm.assert_frame_equal(result, expected)
  88. def test_concat_series_chained(using_copy_on_write):
  89. ser1 = Series([1, 2, 3], name="a")
  90. ser2 = Series([4, 5, 6], name="c")
  91. ser3 = Series([4, 5, 6], name="d")
  92. result = concat([concat([ser1, ser2], axis=1), ser3], axis=1)
  93. expected = result.copy()
  94. if using_copy_on_write:
  95. assert np.shares_memory(get_array(result, "a"), get_array(ser1, "a"))
  96. assert np.shares_memory(get_array(result, "c"), get_array(ser2, "c"))
  97. assert np.shares_memory(get_array(result, "d"), get_array(ser3, "d"))
  98. else:
  99. assert not np.shares_memory(get_array(result, "a"), get_array(ser1, "a"))
  100. assert not np.shares_memory(get_array(result, "c"), get_array(ser2, "c"))
  101. assert not np.shares_memory(get_array(result, "d"), get_array(ser3, "d"))
  102. ser1.iloc[0] = 100
  103. if using_copy_on_write:
  104. assert not np.shares_memory(get_array(result, "a"), get_array(ser1, "a"))
  105. tm.assert_frame_equal(result, expected)
  106. def test_concat_series_updating_input(using_copy_on_write):
  107. ser = Series([1, 2], name="a")
  108. ser2 = Series([3, 4], name="b")
  109. expected = DataFrame({"a": [1, 2], "b": [3, 4]})
  110. result = concat([ser, ser2], axis=1)
  111. if using_copy_on_write:
  112. assert np.shares_memory(get_array(result, "a"), get_array(ser, "a"))
  113. assert np.shares_memory(get_array(result, "b"), get_array(ser2, "b"))
  114. else:
  115. assert not np.shares_memory(get_array(result, "a"), get_array(ser, "a"))
  116. assert not np.shares_memory(get_array(result, "b"), get_array(ser2, "b"))
  117. ser.iloc[0] = 100
  118. if using_copy_on_write:
  119. assert not np.shares_memory(get_array(result, "a"), get_array(ser, "a"))
  120. assert np.shares_memory(get_array(result, "b"), get_array(ser2, "b"))
  121. tm.assert_frame_equal(result, expected)
  122. ser2.iloc[0] = 1000
  123. if using_copy_on_write:
  124. assert not np.shares_memory(get_array(result, "b"), get_array(ser2, "b"))
  125. tm.assert_frame_equal(result, expected)
  126. def test_concat_mixed_series_frame(using_copy_on_write):
  127. df = DataFrame({"a": [1, 2, 3], "c": 1})
  128. ser = Series([4, 5, 6], name="d")
  129. result = concat([df, ser], axis=1)
  130. expected = result.copy()
  131. if using_copy_on_write:
  132. assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
  133. assert np.shares_memory(get_array(result, "c"), get_array(df, "c"))
  134. assert np.shares_memory(get_array(result, "d"), get_array(ser, "d"))
  135. else:
  136. assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
  137. assert not np.shares_memory(get_array(result, "c"), get_array(df, "c"))
  138. assert not np.shares_memory(get_array(result, "d"), get_array(ser, "d"))
  139. ser.iloc[0] = 100
  140. if using_copy_on_write:
  141. assert not np.shares_memory(get_array(result, "d"), get_array(ser, "d"))
  142. df.iloc[0, 0] = 100
  143. if using_copy_on_write:
  144. assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
  145. tm.assert_frame_equal(result, expected)
  146. @pytest.mark.parametrize("copy", [True, None, False])
  147. def test_concat_copy_keyword(using_copy_on_write, copy):
  148. df = DataFrame({"a": [1, 2]})
  149. df2 = DataFrame({"b": [1.5, 2.5]})
  150. result = concat([df, df2], axis=1, copy=copy)
  151. if using_copy_on_write or copy is False:
  152. assert np.shares_memory(get_array(df, "a"), get_array(result, "a"))
  153. assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
  154. else:
  155. assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
  156. assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
  157. @pytest.mark.parametrize(
  158. "func",
  159. [
  160. lambda df1, df2, **kwargs: df1.merge(df2, **kwargs),
  161. lambda df1, df2, **kwargs: merge(df1, df2, **kwargs),
  162. ],
  163. )
  164. def test_merge_on_key(using_copy_on_write, func):
  165. df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]})
  166. df2 = DataFrame({"key": ["a", "b", "c"], "b": [4, 5, 6]})
  167. df1_orig = df1.copy()
  168. df2_orig = df2.copy()
  169. result = func(df1, df2, on="key")
  170. if using_copy_on_write:
  171. assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
  172. assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
  173. assert np.shares_memory(get_array(result, "key"), get_array(df1, "key"))
  174. assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key"))
  175. else:
  176. assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
  177. assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
  178. result.iloc[0, 1] = 0
  179. if using_copy_on_write:
  180. assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
  181. assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
  182. result.iloc[0, 2] = 0
  183. if using_copy_on_write:
  184. assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
  185. tm.assert_frame_equal(df1, df1_orig)
  186. tm.assert_frame_equal(df2, df2_orig)
  187. def test_merge_on_index(using_copy_on_write):
  188. df1 = DataFrame({"a": [1, 2, 3]})
  189. df2 = DataFrame({"b": [4, 5, 6]})
  190. df1_orig = df1.copy()
  191. df2_orig = df2.copy()
  192. result = merge(df1, df2, left_index=True, right_index=True)
  193. if using_copy_on_write:
  194. assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
  195. assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
  196. else:
  197. assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
  198. assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
  199. result.iloc[0, 0] = 0
  200. if using_copy_on_write:
  201. assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
  202. assert np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
  203. result.iloc[0, 1] = 0
  204. if using_copy_on_write:
  205. assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
  206. tm.assert_frame_equal(df1, df1_orig)
  207. tm.assert_frame_equal(df2, df2_orig)
  208. @pytest.mark.parametrize(
  209. "func, how",
  210. [
  211. (lambda df1, df2, **kwargs: merge(df2, df1, on="key", **kwargs), "right"),
  212. (lambda df1, df2, **kwargs: merge(df1, df2, on="key", **kwargs), "left"),
  213. ],
  214. )
  215. def test_merge_on_key_enlarging_one(using_copy_on_write, func, how):
  216. df1 = DataFrame({"key": ["a", "b", "c"], "a": [1, 2, 3]})
  217. df2 = DataFrame({"key": ["a", "b"], "b": [4, 5]})
  218. df1_orig = df1.copy()
  219. df2_orig = df2.copy()
  220. result = func(df1, df2, how=how)
  221. if using_copy_on_write:
  222. assert np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
  223. assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
  224. assert df2._mgr._has_no_reference(1)
  225. assert df2._mgr._has_no_reference(0)
  226. assert np.shares_memory(get_array(result, "key"), get_array(df1, "key")) is (
  227. how == "left"
  228. )
  229. assert not np.shares_memory(get_array(result, "key"), get_array(df2, "key"))
  230. else:
  231. assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
  232. assert not np.shares_memory(get_array(result, "b"), get_array(df2, "b"))
  233. if how == "left":
  234. result.iloc[0, 1] = 0
  235. else:
  236. result.iloc[0, 2] = 0
  237. if using_copy_on_write:
  238. assert not np.shares_memory(get_array(result, "a"), get_array(df1, "a"))
  239. tm.assert_frame_equal(df1, df1_orig)
  240. tm.assert_frame_equal(df2, df2_orig)
  241. @pytest.mark.parametrize("copy", [True, None, False])
  242. def test_merge_copy_keyword(using_copy_on_write, copy):
  243. df = DataFrame({"a": [1, 2]})
  244. df2 = DataFrame({"b": [3, 4.5]})
  245. result = df.merge(df2, copy=copy, left_index=True, right_index=True)
  246. if using_copy_on_write or copy is False:
  247. assert np.shares_memory(get_array(df, "a"), get_array(result, "a"))
  248. assert np.shares_memory(get_array(df2, "b"), get_array(result, "b"))
  249. else:
  250. assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
  251. assert not np.shares_memory(get_array(df2, "b"), get_array(result, "b"))