test_methods.py 58 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725
  1. import numpy as np
  2. import pytest
  3. from pandas.errors import SettingWithCopyWarning
  4. import pandas as pd
  5. from pandas import (
  6. DataFrame,
  7. Index,
  8. MultiIndex,
  9. Period,
  10. Series,
  11. Timestamp,
  12. date_range,
  13. period_range,
  14. )
  15. import pandas._testing as tm
  16. from pandas.tests.copy_view.util import get_array
  17. def test_copy(using_copy_on_write):
  18. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  19. df_copy = df.copy()
  20. # the deep copy doesn't share memory
  21. assert not np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
  22. if using_copy_on_write:
  23. assert not df_copy._mgr.blocks[0].refs.has_reference()
  24. assert not df_copy._mgr.blocks[1].refs.has_reference()
  25. # mutating copy doesn't mutate original
  26. df_copy.iloc[0, 0] = 0
  27. assert df.iloc[0, 0] == 1
  28. def test_copy_shallow(using_copy_on_write):
  29. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  30. df_copy = df.copy(deep=False)
  31. # the shallow copy still shares memory
  32. assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
  33. if using_copy_on_write:
  34. assert df_copy._mgr.blocks[0].refs.has_reference()
  35. assert df_copy._mgr.blocks[1].refs.has_reference()
  36. if using_copy_on_write:
  37. # mutating shallow copy doesn't mutate original
  38. df_copy.iloc[0, 0] = 0
  39. assert df.iloc[0, 0] == 1
  40. # mutating triggered a copy-on-write -> no longer shares memory
  41. assert not np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
  42. # but still shares memory for the other columns/blocks
  43. assert np.shares_memory(get_array(df_copy, "c"), get_array(df, "c"))
  44. else:
  45. # mutating shallow copy does mutate original
  46. df_copy.iloc[0, 0] = 0
  47. assert df.iloc[0, 0] == 0
  48. # and still shares memory
  49. assert np.shares_memory(get_array(df_copy, "a"), get_array(df, "a"))
  50. @pytest.mark.parametrize("copy", [True, None, False])
  51. @pytest.mark.parametrize(
  52. "method",
  53. [
  54. lambda df, copy: df.rename(columns=str.lower, copy=copy),
  55. lambda df, copy: df.reindex(columns=["a", "c"], copy=copy),
  56. lambda df, copy: df.reindex_like(df, copy=copy),
  57. lambda df, copy: df.align(df, copy=copy)[0],
  58. lambda df, copy: df.set_axis(["a", "b", "c"], axis="index", copy=copy),
  59. lambda df, copy: df.rename_axis(index="test", copy=copy),
  60. lambda df, copy: df.rename_axis(columns="test", copy=copy),
  61. lambda df, copy: df.astype({"b": "int64"}, copy=copy),
  62. # lambda df, copy: df.swaplevel(0, 0, copy=copy),
  63. lambda df, copy: df.swapaxes(0, 0, copy=copy),
  64. lambda df, copy: df.truncate(0, 5, copy=copy),
  65. lambda df, copy: df.infer_objects(copy=copy),
  66. lambda df, copy: df.to_timestamp(copy=copy),
  67. lambda df, copy: df.to_period(freq="D", copy=copy),
  68. lambda df, copy: df.tz_localize("US/Central", copy=copy),
  69. lambda df, copy: df.tz_convert("US/Central", copy=copy),
  70. lambda df, copy: df.set_flags(allows_duplicate_labels=False, copy=copy),
  71. ],
  72. ids=[
  73. "rename",
  74. "reindex",
  75. "reindex_like",
  76. "align",
  77. "set_axis",
  78. "rename_axis0",
  79. "rename_axis1",
  80. "astype",
  81. # "swaplevel", # only series
  82. "swapaxes",
  83. "truncate",
  84. "infer_objects",
  85. "to_timestamp",
  86. "to_period",
  87. "tz_localize",
  88. "tz_convert",
  89. "set_flags",
  90. ],
  91. )
  92. def test_methods_copy_keyword(
  93. request, method, copy, using_copy_on_write, using_array_manager
  94. ):
  95. index = None
  96. if "to_timestamp" in request.node.callspec.id:
  97. index = period_range("2012-01-01", freq="D", periods=3)
  98. elif "to_period" in request.node.callspec.id:
  99. index = date_range("2012-01-01", freq="D", periods=3)
  100. elif "tz_localize" in request.node.callspec.id:
  101. index = date_range("2012-01-01", freq="D", periods=3)
  102. elif "tz_convert" in request.node.callspec.id:
  103. index = date_range("2012-01-01", freq="D", periods=3, tz="Europe/Brussels")
  104. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}, index=index)
  105. df2 = method(df, copy=copy)
  106. share_memory = using_copy_on_write or copy is False
  107. if request.node.callspec.id.startswith("reindex-"):
  108. # TODO copy=False without CoW still returns a copy in this case
  109. if not using_copy_on_write and not using_array_manager and copy is False:
  110. share_memory = False
  111. if share_memory:
  112. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  113. else:
  114. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  115. @pytest.mark.parametrize("copy", [True, None, False])
  116. @pytest.mark.parametrize(
  117. "method",
  118. [
  119. lambda ser, copy: ser.rename(index={0: 100}, copy=copy),
  120. lambda ser, copy: ser.rename(None, copy=copy),
  121. lambda ser, copy: ser.reindex(index=ser.index, copy=copy),
  122. lambda ser, copy: ser.reindex_like(ser, copy=copy),
  123. lambda ser, copy: ser.align(ser, copy=copy)[0],
  124. lambda ser, copy: ser.set_axis(["a", "b", "c"], axis="index", copy=copy),
  125. lambda ser, copy: ser.rename_axis(index="test", copy=copy),
  126. lambda ser, copy: ser.astype("int64", copy=copy),
  127. lambda ser, copy: ser.swaplevel(0, 1, copy=copy),
  128. lambda ser, copy: ser.swapaxes(0, 0, copy=copy),
  129. lambda ser, copy: ser.truncate(0, 5, copy=copy),
  130. lambda ser, copy: ser.infer_objects(copy=copy),
  131. lambda ser, copy: ser.to_timestamp(copy=copy),
  132. lambda ser, copy: ser.to_period(freq="D", copy=copy),
  133. lambda ser, copy: ser.tz_localize("US/Central", copy=copy),
  134. lambda ser, copy: ser.tz_convert("US/Central", copy=copy),
  135. lambda ser, copy: ser.set_flags(allows_duplicate_labels=False, copy=copy),
  136. ],
  137. ids=[
  138. "rename (dict)",
  139. "rename",
  140. "reindex",
  141. "reindex_like",
  142. "align",
  143. "set_axis",
  144. "rename_axis0",
  145. "astype",
  146. "swaplevel",
  147. "swapaxes",
  148. "truncate",
  149. "infer_objects",
  150. "to_timestamp",
  151. "to_period",
  152. "tz_localize",
  153. "tz_convert",
  154. "set_flags",
  155. ],
  156. )
  157. def test_methods_series_copy_keyword(request, method, copy, using_copy_on_write):
  158. index = None
  159. if "to_timestamp" in request.node.callspec.id:
  160. index = period_range("2012-01-01", freq="D", periods=3)
  161. elif "to_period" in request.node.callspec.id:
  162. index = date_range("2012-01-01", freq="D", periods=3)
  163. elif "tz_localize" in request.node.callspec.id:
  164. index = date_range("2012-01-01", freq="D", periods=3)
  165. elif "tz_convert" in request.node.callspec.id:
  166. index = date_range("2012-01-01", freq="D", periods=3, tz="Europe/Brussels")
  167. elif "swaplevel" in request.node.callspec.id:
  168. index = MultiIndex.from_arrays([[1, 2, 3], [4, 5, 6]])
  169. ser = Series([1, 2, 3], index=index)
  170. ser2 = method(ser, copy=copy)
  171. share_memory = using_copy_on_write or copy is False
  172. if share_memory:
  173. assert np.shares_memory(get_array(ser2), get_array(ser))
  174. else:
  175. assert not np.shares_memory(get_array(ser2), get_array(ser))
  176. @pytest.mark.parametrize("copy", [True, None, False])
  177. def test_transpose_copy_keyword(using_copy_on_write, copy, using_array_manager):
  178. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  179. result = df.transpose(copy=copy)
  180. share_memory = using_copy_on_write or copy is False or copy is None
  181. share_memory = share_memory and not using_array_manager
  182. if share_memory:
  183. assert np.shares_memory(get_array(df, "a"), get_array(result, 0))
  184. else:
  185. assert not np.shares_memory(get_array(df, "a"), get_array(result, 0))
  186. # -----------------------------------------------------------------------------
  187. # DataFrame methods returning new DataFrame using shallow copy
  188. def test_reset_index(using_copy_on_write):
  189. # Case: resetting the index (i.e. adding a new column) + mutating the
  190. # resulting dataframe
  191. df = DataFrame(
  192. {"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}, index=[10, 11, 12]
  193. )
  194. df_orig = df.copy()
  195. df2 = df.reset_index()
  196. df2._mgr._verify_integrity()
  197. if using_copy_on_write:
  198. # still shares memory (df2 is a shallow copy)
  199. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  200. assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  201. # mutating df2 triggers a copy-on-write for that column / block
  202. df2.iloc[0, 2] = 0
  203. assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  204. if using_copy_on_write:
  205. assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  206. tm.assert_frame_equal(df, df_orig)
  207. @pytest.mark.parametrize("index", [pd.RangeIndex(0, 2), Index([1, 2])])
  208. def test_reset_index_series_drop(using_copy_on_write, index):
  209. ser = Series([1, 2], index=index)
  210. ser_orig = ser.copy()
  211. ser2 = ser.reset_index(drop=True)
  212. if using_copy_on_write:
  213. assert np.shares_memory(get_array(ser), get_array(ser2))
  214. assert not ser._mgr._has_no_reference(0)
  215. else:
  216. assert not np.shares_memory(get_array(ser), get_array(ser2))
  217. ser2.iloc[0] = 100
  218. tm.assert_series_equal(ser, ser_orig)
  219. def test_rename_columns(using_copy_on_write):
  220. # Case: renaming columns returns a new dataframe
  221. # + afterwards modifying the result
  222. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  223. df_orig = df.copy()
  224. df2 = df.rename(columns=str.upper)
  225. if using_copy_on_write:
  226. assert np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
  227. df2.iloc[0, 0] = 0
  228. assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
  229. if using_copy_on_write:
  230. assert np.shares_memory(get_array(df2, "C"), get_array(df, "c"))
  231. expected = DataFrame({"A": [0, 2, 3], "B": [4, 5, 6], "C": [0.1, 0.2, 0.3]})
  232. tm.assert_frame_equal(df2, expected)
  233. tm.assert_frame_equal(df, df_orig)
  234. def test_rename_columns_modify_parent(using_copy_on_write):
  235. # Case: renaming columns returns a new dataframe
  236. # + afterwards modifying the original (parent) dataframe
  237. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  238. df2 = df.rename(columns=str.upper)
  239. df2_orig = df2.copy()
  240. if using_copy_on_write:
  241. assert np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
  242. else:
  243. assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
  244. df.iloc[0, 0] = 0
  245. assert not np.shares_memory(get_array(df2, "A"), get_array(df, "a"))
  246. if using_copy_on_write:
  247. assert np.shares_memory(get_array(df2, "C"), get_array(df, "c"))
  248. expected = DataFrame({"a": [0, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  249. tm.assert_frame_equal(df, expected)
  250. tm.assert_frame_equal(df2, df2_orig)
  251. def test_pipe(using_copy_on_write):
  252. df = DataFrame({"a": [1, 2, 3], "b": 1.5})
  253. df_orig = df.copy()
  254. def testfunc(df):
  255. return df
  256. df2 = df.pipe(testfunc)
  257. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  258. # mutating df2 triggers a copy-on-write for that column
  259. df2.iloc[0, 0] = 0
  260. if using_copy_on_write:
  261. tm.assert_frame_equal(df, df_orig)
  262. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  263. else:
  264. expected = DataFrame({"a": [0, 2, 3], "b": 1.5})
  265. tm.assert_frame_equal(df, expected)
  266. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  267. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  268. def test_pipe_modify_df(using_copy_on_write):
  269. df = DataFrame({"a": [1, 2, 3], "b": 1.5})
  270. df_orig = df.copy()
  271. def testfunc(df):
  272. df.iloc[0, 0] = 100
  273. return df
  274. df2 = df.pipe(testfunc)
  275. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  276. if using_copy_on_write:
  277. tm.assert_frame_equal(df, df_orig)
  278. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  279. else:
  280. expected = DataFrame({"a": [100, 2, 3], "b": 1.5})
  281. tm.assert_frame_equal(df, expected)
  282. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  283. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  284. def test_reindex_columns(using_copy_on_write):
  285. # Case: reindexing the column returns a new dataframe
  286. # + afterwards modifying the result
  287. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  288. df_orig = df.copy()
  289. df2 = df.reindex(columns=["a", "c"])
  290. if using_copy_on_write:
  291. # still shares memory (df2 is a shallow copy)
  292. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  293. else:
  294. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  295. # mutating df2 triggers a copy-on-write for that column
  296. df2.iloc[0, 0] = 0
  297. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  298. if using_copy_on_write:
  299. assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  300. tm.assert_frame_equal(df, df_orig)
  301. def test_drop_on_column(using_copy_on_write):
  302. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  303. df_orig = df.copy()
  304. df2 = df.drop(columns="a")
  305. df2._mgr._verify_integrity()
  306. if using_copy_on_write:
  307. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  308. assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  309. else:
  310. assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  311. assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  312. df2.iloc[0, 0] = 0
  313. assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  314. if using_copy_on_write:
  315. assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  316. tm.assert_frame_equal(df, df_orig)
  317. def test_select_dtypes(using_copy_on_write):
  318. # Case: selecting columns using `select_dtypes()` returns a new dataframe
  319. # + afterwards modifying the result
  320. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  321. df_orig = df.copy()
  322. df2 = df.select_dtypes("int64")
  323. df2._mgr._verify_integrity()
  324. if using_copy_on_write:
  325. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  326. else:
  327. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  328. # mutating df2 triggers a copy-on-write for that column/block
  329. df2.iloc[0, 0] = 0
  330. if using_copy_on_write:
  331. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  332. tm.assert_frame_equal(df, df_orig)
  333. @pytest.mark.parametrize(
  334. "filter_kwargs", [{"items": ["a"]}, {"like": "a"}, {"regex": "a"}]
  335. )
  336. def test_filter(using_copy_on_write, filter_kwargs):
  337. # Case: selecting columns using `filter()` returns a new dataframe
  338. # + afterwards modifying the result
  339. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  340. df_orig = df.copy()
  341. df2 = df.filter(**filter_kwargs)
  342. if using_copy_on_write:
  343. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  344. else:
  345. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  346. # mutating df2 triggers a copy-on-write for that column/block
  347. if using_copy_on_write:
  348. df2.iloc[0, 0] = 0
  349. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  350. tm.assert_frame_equal(df, df_orig)
  351. def test_shift_no_op(using_copy_on_write):
  352. df = DataFrame(
  353. [[1, 2], [3, 4], [5, 6]],
  354. index=date_range("2020-01-01", "2020-01-03"),
  355. columns=["a", "b"],
  356. )
  357. df_orig = df.copy()
  358. df2 = df.shift(periods=0)
  359. if using_copy_on_write:
  360. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  361. else:
  362. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  363. df.iloc[0, 0] = 0
  364. if using_copy_on_write:
  365. assert not np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
  366. tm.assert_frame_equal(df2, df_orig)
  367. def test_shift_index(using_copy_on_write):
  368. df = DataFrame(
  369. [[1, 2], [3, 4], [5, 6]],
  370. index=date_range("2020-01-01", "2020-01-03"),
  371. columns=["a", "b"],
  372. )
  373. df2 = df.shift(periods=1, axis=0)
  374. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  375. def test_shift_rows_freq(using_copy_on_write):
  376. df = DataFrame(
  377. [[1, 2], [3, 4], [5, 6]],
  378. index=date_range("2020-01-01", "2020-01-03"),
  379. columns=["a", "b"],
  380. )
  381. df_orig = df.copy()
  382. df_orig.index = date_range("2020-01-02", "2020-01-04")
  383. df2 = df.shift(periods=1, freq="1D")
  384. if using_copy_on_write:
  385. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  386. else:
  387. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  388. df.iloc[0, 0] = 0
  389. if using_copy_on_write:
  390. assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
  391. tm.assert_frame_equal(df2, df_orig)
  392. def test_shift_columns(using_copy_on_write):
  393. df = DataFrame(
  394. [[1, 2], [3, 4], [5, 6]], columns=date_range("2020-01-01", "2020-01-02")
  395. )
  396. df2 = df.shift(periods=1, axis=1)
  397. assert np.shares_memory(get_array(df2, "2020-01-02"), get_array(df, "2020-01-01"))
  398. df.iloc[0, 1] = 0
  399. if using_copy_on_write:
  400. assert not np.shares_memory(
  401. get_array(df2, "2020-01-02"), get_array(df, "2020-01-01")
  402. )
  403. expected = DataFrame(
  404. [[np.nan, 1], [np.nan, 3], [np.nan, 5]],
  405. columns=date_range("2020-01-01", "2020-01-02"),
  406. )
  407. tm.assert_frame_equal(df2, expected)
  408. def test_pop(using_copy_on_write):
  409. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  410. df_orig = df.copy()
  411. view_original = df[:]
  412. result = df.pop("a")
  413. assert np.shares_memory(result.values, get_array(view_original, "a"))
  414. assert np.shares_memory(get_array(df, "b"), get_array(view_original, "b"))
  415. if using_copy_on_write:
  416. result.iloc[0] = 0
  417. assert not np.shares_memory(result.values, get_array(view_original, "a"))
  418. df.iloc[0, 0] = 0
  419. if using_copy_on_write:
  420. assert not np.shares_memory(get_array(df, "b"), get_array(view_original, "b"))
  421. tm.assert_frame_equal(view_original, df_orig)
  422. else:
  423. expected = DataFrame({"a": [1, 2, 3], "b": [0, 5, 6], "c": [0.1, 0.2, 0.3]})
  424. tm.assert_frame_equal(view_original, expected)
  425. @pytest.mark.parametrize(
  426. "func",
  427. [
  428. lambda x, y: x.align(y),
  429. lambda x, y: x.align(y.a, axis=0),
  430. lambda x, y: x.align(y.a.iloc[slice(0, 1)], axis=1),
  431. ],
  432. )
  433. def test_align_frame(using_copy_on_write, func):
  434. df = DataFrame({"a": [1, 2, 3], "b": "a"})
  435. df_orig = df.copy()
  436. df_changed = df[["b", "a"]].copy()
  437. df2, _ = func(df, df_changed)
  438. if using_copy_on_write:
  439. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  440. else:
  441. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  442. df2.iloc[0, 0] = 0
  443. if using_copy_on_write:
  444. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  445. tm.assert_frame_equal(df, df_orig)
  446. def test_align_series(using_copy_on_write):
  447. ser = Series([1, 2])
  448. ser_orig = ser.copy()
  449. ser_other = ser.copy()
  450. ser2, ser_other_result = ser.align(ser_other)
  451. if using_copy_on_write:
  452. assert np.shares_memory(ser2.values, ser.values)
  453. assert np.shares_memory(ser_other_result.values, ser_other.values)
  454. else:
  455. assert not np.shares_memory(ser2.values, ser.values)
  456. assert not np.shares_memory(ser_other_result.values, ser_other.values)
  457. ser2.iloc[0] = 0
  458. ser_other_result.iloc[0] = 0
  459. if using_copy_on_write:
  460. assert not np.shares_memory(ser2.values, ser.values)
  461. assert not np.shares_memory(ser_other_result.values, ser_other.values)
  462. tm.assert_series_equal(ser, ser_orig)
  463. tm.assert_series_equal(ser_other, ser_orig)
  464. def test_align_copy_false(using_copy_on_write):
  465. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  466. df_orig = df.copy()
  467. df2, df3 = df.align(df, copy=False)
  468. assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
  469. assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
  470. if using_copy_on_write:
  471. df2.loc[0, "a"] = 0
  472. tm.assert_frame_equal(df, df_orig) # Original is unchanged
  473. df3.loc[0, "a"] = 0
  474. tm.assert_frame_equal(df, df_orig) # Original is unchanged
  475. def test_align_with_series_copy_false(using_copy_on_write):
  476. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  477. ser = Series([1, 2, 3], name="x")
  478. ser_orig = ser.copy()
  479. df_orig = df.copy()
  480. df2, ser2 = df.align(ser, copy=False, axis=0)
  481. assert np.shares_memory(get_array(df, "b"), get_array(df2, "b"))
  482. assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
  483. assert np.shares_memory(get_array(ser, "x"), get_array(ser2, "x"))
  484. if using_copy_on_write:
  485. df2.loc[0, "a"] = 0
  486. tm.assert_frame_equal(df, df_orig) # Original is unchanged
  487. ser2.loc[0] = 0
  488. tm.assert_series_equal(ser, ser_orig) # Original is unchanged
  489. def test_to_frame(using_copy_on_write):
  490. # Case: converting a Series to a DataFrame with to_frame
  491. ser = Series([1, 2, 3])
  492. ser_orig = ser.copy()
  493. df = ser[:].to_frame()
  494. # currently this always returns a "view"
  495. assert np.shares_memory(ser.values, get_array(df, 0))
  496. df.iloc[0, 0] = 0
  497. if using_copy_on_write:
  498. # mutating df triggers a copy-on-write for that column
  499. assert not np.shares_memory(ser.values, get_array(df, 0))
  500. tm.assert_series_equal(ser, ser_orig)
  501. else:
  502. # but currently select_dtypes() actually returns a view -> mutates parent
  503. expected = ser_orig.copy()
  504. expected.iloc[0] = 0
  505. tm.assert_series_equal(ser, expected)
  506. # modify original series -> don't modify dataframe
  507. df = ser[:].to_frame()
  508. ser.iloc[0] = 0
  509. if using_copy_on_write:
  510. tm.assert_frame_equal(df, ser_orig.to_frame())
  511. else:
  512. expected = ser_orig.copy().to_frame()
  513. expected.iloc[0, 0] = 0
  514. tm.assert_frame_equal(df, expected)
  515. @pytest.mark.parametrize("ax", ["index", "columns"])
  516. def test_swapaxes_noop(using_copy_on_write, ax):
  517. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  518. df_orig = df.copy()
  519. df2 = df.swapaxes(ax, ax)
  520. if using_copy_on_write:
  521. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  522. else:
  523. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  524. # mutating df2 triggers a copy-on-write for that column/block
  525. df2.iloc[0, 0] = 0
  526. if using_copy_on_write:
  527. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  528. tm.assert_frame_equal(df, df_orig)
  529. def test_swapaxes_single_block(using_copy_on_write):
  530. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=["x", "y", "z"])
  531. df_orig = df.copy()
  532. df2 = df.swapaxes("index", "columns")
  533. if using_copy_on_write:
  534. assert np.shares_memory(get_array(df2, "x"), get_array(df, "a"))
  535. else:
  536. assert not np.shares_memory(get_array(df2, "x"), get_array(df, "a"))
  537. # mutating df2 triggers a copy-on-write for that column/block
  538. df2.iloc[0, 0] = 0
  539. if using_copy_on_write:
  540. assert not np.shares_memory(get_array(df2, "x"), get_array(df, "a"))
  541. tm.assert_frame_equal(df, df_orig)
  542. def test_swapaxes_read_only_array():
  543. df = DataFrame({"a": [1, 2], "b": 3})
  544. df = df.swapaxes(axis1="index", axis2="columns")
  545. df.iloc[0, 0] = 100
  546. expected = DataFrame({0: [100, 3], 1: [2, 3]}, index=["a", "b"])
  547. tm.assert_frame_equal(df, expected)
  548. @pytest.mark.parametrize(
  549. "method, idx",
  550. [
  551. (lambda df: df.copy(deep=False).copy(deep=False), 0),
  552. (lambda df: df.reset_index().reset_index(), 2),
  553. (lambda df: df.rename(columns=str.upper).rename(columns=str.lower), 0),
  554. (lambda df: df.copy(deep=False).select_dtypes(include="number"), 0),
  555. ],
  556. ids=["shallow-copy", "reset_index", "rename", "select_dtypes"],
  557. )
  558. def test_chained_methods(request, method, idx, using_copy_on_write):
  559. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  560. df_orig = df.copy()
  561. # when not using CoW, only the copy() variant actually gives a view
  562. df2_is_view = not using_copy_on_write and request.node.callspec.id == "shallow-copy"
  563. # modify df2 -> don't modify df
  564. df2 = method(df)
  565. df2.iloc[0, idx] = 0
  566. if not df2_is_view:
  567. tm.assert_frame_equal(df, df_orig)
  568. # modify df -> don't modify df2
  569. df2 = method(df)
  570. df.iloc[0, 0] = 0
  571. if not df2_is_view:
  572. tm.assert_frame_equal(df2.iloc[:, idx:], df_orig)
  573. @pytest.mark.parametrize("obj", [Series([1, 2], name="a"), DataFrame({"a": [1, 2]})])
  574. def test_to_timestamp(using_copy_on_write, obj):
  575. obj.index = Index([Period("2012-1-1", freq="D"), Period("2012-1-2", freq="D")])
  576. obj_orig = obj.copy()
  577. obj2 = obj.to_timestamp()
  578. if using_copy_on_write:
  579. assert np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
  580. else:
  581. assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
  582. # mutating obj2 triggers a copy-on-write for that column / block
  583. obj2.iloc[0] = 0
  584. assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
  585. tm.assert_equal(obj, obj_orig)
  586. @pytest.mark.parametrize("obj", [Series([1, 2], name="a"), DataFrame({"a": [1, 2]})])
  587. def test_to_period(using_copy_on_write, obj):
  588. obj.index = Index([Timestamp("2019-12-31"), Timestamp("2020-12-31")])
  589. obj_orig = obj.copy()
  590. obj2 = obj.to_period(freq="Y")
  591. if using_copy_on_write:
  592. assert np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
  593. else:
  594. assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
  595. # mutating obj2 triggers a copy-on-write for that column / block
  596. obj2.iloc[0] = 0
  597. assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
  598. tm.assert_equal(obj, obj_orig)
  599. def test_set_index(using_copy_on_write):
  600. # GH 49473
  601. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  602. df_orig = df.copy()
  603. df2 = df.set_index("a")
  604. if using_copy_on_write:
  605. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  606. else:
  607. assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  608. # mutating df2 triggers a copy-on-write for that column / block
  609. df2.iloc[0, 1] = 0
  610. assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  611. tm.assert_frame_equal(df, df_orig)
  612. def test_set_index_mutating_parent_does_not_mutate_index():
  613. df = DataFrame({"a": [1, 2, 3], "b": 1})
  614. result = df.set_index("a")
  615. expected = result.copy()
  616. df.iloc[0, 0] = 100
  617. tm.assert_frame_equal(result, expected)
  618. def test_add_prefix(using_copy_on_write):
  619. # GH 49473
  620. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  621. df_orig = df.copy()
  622. df2 = df.add_prefix("CoW_")
  623. if using_copy_on_write:
  624. assert np.shares_memory(get_array(df2, "CoW_a"), get_array(df, "a"))
  625. df2.iloc[0, 0] = 0
  626. assert not np.shares_memory(get_array(df2, "CoW_a"), get_array(df, "a"))
  627. if using_copy_on_write:
  628. assert np.shares_memory(get_array(df2, "CoW_c"), get_array(df, "c"))
  629. expected = DataFrame(
  630. {"CoW_a": [0, 2, 3], "CoW_b": [4, 5, 6], "CoW_c": [0.1, 0.2, 0.3]}
  631. )
  632. tm.assert_frame_equal(df2, expected)
  633. tm.assert_frame_equal(df, df_orig)
  634. def test_add_suffix(using_copy_on_write):
  635. # GH 49473
  636. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  637. df_orig = df.copy()
  638. df2 = df.add_suffix("_CoW")
  639. if using_copy_on_write:
  640. assert np.shares_memory(get_array(df2, "a_CoW"), get_array(df, "a"))
  641. df2.iloc[0, 0] = 0
  642. assert not np.shares_memory(get_array(df2, "a_CoW"), get_array(df, "a"))
  643. if using_copy_on_write:
  644. assert np.shares_memory(get_array(df2, "c_CoW"), get_array(df, "c"))
  645. expected = DataFrame(
  646. {"a_CoW": [0, 2, 3], "b_CoW": [4, 5, 6], "c_CoW": [0.1, 0.2, 0.3]}
  647. )
  648. tm.assert_frame_equal(df2, expected)
  649. tm.assert_frame_equal(df, df_orig)
  650. @pytest.mark.parametrize("axis, val", [(0, 5.5), (1, np.nan)])
  651. def test_dropna(using_copy_on_write, axis, val):
  652. df = DataFrame({"a": [1, 2, 3], "b": [4, val, 6], "c": "d"})
  653. df_orig = df.copy()
  654. df2 = df.dropna(axis=axis)
  655. if using_copy_on_write:
  656. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  657. else:
  658. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  659. df2.iloc[0, 0] = 0
  660. if using_copy_on_write:
  661. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  662. tm.assert_frame_equal(df, df_orig)
  663. @pytest.mark.parametrize("val", [5, 5.5])
  664. def test_dropna_series(using_copy_on_write, val):
  665. ser = Series([1, val, 4])
  666. ser_orig = ser.copy()
  667. ser2 = ser.dropna()
  668. if using_copy_on_write:
  669. assert np.shares_memory(ser2.values, ser.values)
  670. else:
  671. assert not np.shares_memory(ser2.values, ser.values)
  672. ser2.iloc[0] = 0
  673. if using_copy_on_write:
  674. assert not np.shares_memory(ser2.values, ser.values)
  675. tm.assert_series_equal(ser, ser_orig)
  676. @pytest.mark.parametrize(
  677. "method",
  678. [
  679. lambda df: df.head(),
  680. lambda df: df.head(2),
  681. lambda df: df.tail(),
  682. lambda df: df.tail(3),
  683. ],
  684. )
  685. def test_head_tail(method, using_copy_on_write):
  686. df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
  687. df_orig = df.copy()
  688. df2 = method(df)
  689. df2._mgr._verify_integrity()
  690. if using_copy_on_write:
  691. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  692. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  693. # modify df2 to trigger CoW for that block
  694. df2.iloc[0, 0] = 0
  695. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  696. if using_copy_on_write:
  697. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  698. else:
  699. # without CoW enabled, head and tail return views. Mutating df2 also mutates df.
  700. df2.iloc[0, 0] = 1
  701. tm.assert_frame_equal(df, df_orig)
  702. def test_infer_objects(using_copy_on_write):
  703. df = DataFrame({"a": [1, 2], "b": "c", "c": 1, "d": "x"})
  704. df_orig = df.copy()
  705. df2 = df.infer_objects()
  706. if using_copy_on_write:
  707. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  708. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  709. else:
  710. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  711. assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  712. df2.iloc[0, 0] = 0
  713. df2.iloc[0, 1] = "d"
  714. if using_copy_on_write:
  715. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  716. assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  717. tm.assert_frame_equal(df, df_orig)
  718. def test_infer_objects_no_reference(using_copy_on_write):
  719. df = DataFrame(
  720. {
  721. "a": [1, 2],
  722. "b": "c",
  723. "c": 1,
  724. "d": Series(
  725. [Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype="object"
  726. ),
  727. "e": "b",
  728. }
  729. )
  730. df = df.infer_objects()
  731. arr_a = get_array(df, "a")
  732. arr_b = get_array(df, "b")
  733. arr_d = get_array(df, "d")
  734. df.iloc[0, 0] = 0
  735. df.iloc[0, 1] = "d"
  736. df.iloc[0, 3] = Timestamp("2018-12-31")
  737. if using_copy_on_write:
  738. assert np.shares_memory(arr_a, get_array(df, "a"))
  739. # TODO(CoW): Block splitting causes references here
  740. assert not np.shares_memory(arr_b, get_array(df, "b"))
  741. assert np.shares_memory(arr_d, get_array(df, "d"))
  742. def test_infer_objects_reference(using_copy_on_write):
  743. df = DataFrame(
  744. {
  745. "a": [1, 2],
  746. "b": "c",
  747. "c": 1,
  748. "d": Series(
  749. [Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype="object"
  750. ),
  751. }
  752. )
  753. view = df[:] # noqa: F841
  754. df = df.infer_objects()
  755. arr_a = get_array(df, "a")
  756. arr_b = get_array(df, "b")
  757. arr_d = get_array(df, "d")
  758. df.iloc[0, 0] = 0
  759. df.iloc[0, 1] = "d"
  760. df.iloc[0, 3] = Timestamp("2018-12-31")
  761. if using_copy_on_write:
  762. assert not np.shares_memory(arr_a, get_array(df, "a"))
  763. assert not np.shares_memory(arr_b, get_array(df, "b"))
  764. assert np.shares_memory(arr_d, get_array(df, "d"))
  765. @pytest.mark.parametrize(
  766. "kwargs",
  767. [
  768. {"before": "a", "after": "b", "axis": 1},
  769. {"before": 0, "after": 1, "axis": 0},
  770. ],
  771. )
  772. def test_truncate(using_copy_on_write, kwargs):
  773. df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 2})
  774. df_orig = df.copy()
  775. df2 = df.truncate(**kwargs)
  776. df2._mgr._verify_integrity()
  777. if using_copy_on_write:
  778. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  779. else:
  780. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  781. df2.iloc[0, 0] = 0
  782. if using_copy_on_write:
  783. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  784. tm.assert_frame_equal(df, df_orig)
  785. @pytest.mark.parametrize("method", ["assign", "drop_duplicates"])
  786. def test_assign_drop_duplicates(using_copy_on_write, method):
  787. df = DataFrame({"a": [1, 2, 3]})
  788. df_orig = df.copy()
  789. df2 = getattr(df, method)()
  790. df2._mgr._verify_integrity()
  791. if using_copy_on_write:
  792. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  793. else:
  794. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  795. df2.iloc[0, 0] = 0
  796. if using_copy_on_write:
  797. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  798. tm.assert_frame_equal(df, df_orig)
  799. @pytest.mark.parametrize("obj", [Series([1, 2]), DataFrame({"a": [1, 2]})])
  800. def test_take(using_copy_on_write, obj):
  801. # Check that no copy is made when we take all rows in original order
  802. obj_orig = obj.copy()
  803. obj2 = obj.take([0, 1])
  804. if using_copy_on_write:
  805. assert np.shares_memory(obj2.values, obj.values)
  806. else:
  807. assert not np.shares_memory(obj2.values, obj.values)
  808. obj2.iloc[0] = 0
  809. if using_copy_on_write:
  810. assert not np.shares_memory(obj2.values, obj.values)
  811. tm.assert_equal(obj, obj_orig)
  812. @pytest.mark.parametrize("obj", [Series([1, 2]), DataFrame({"a": [1, 2]})])
  813. def test_between_time(using_copy_on_write, obj):
  814. obj.index = date_range("2018-04-09", periods=2, freq="1D20min")
  815. obj_orig = obj.copy()
  816. obj2 = obj.between_time("0:00", "1:00")
  817. if using_copy_on_write:
  818. assert np.shares_memory(obj2.values, obj.values)
  819. else:
  820. assert not np.shares_memory(obj2.values, obj.values)
  821. obj2.iloc[0] = 0
  822. if using_copy_on_write:
  823. assert not np.shares_memory(obj2.values, obj.values)
  824. tm.assert_equal(obj, obj_orig)
  825. def test_reindex_like(using_copy_on_write):
  826. df = DataFrame({"a": [1, 2], "b": "a"})
  827. other = DataFrame({"b": "a", "a": [1, 2]})
  828. df_orig = df.copy()
  829. df2 = df.reindex_like(other)
  830. if using_copy_on_write:
  831. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  832. else:
  833. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  834. df2.iloc[0, 1] = 0
  835. if using_copy_on_write:
  836. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  837. tm.assert_frame_equal(df, df_orig)
  838. def test_sort_index(using_copy_on_write):
  839. # GH 49473
  840. ser = Series([1, 2, 3])
  841. ser_orig = ser.copy()
  842. ser2 = ser.sort_index()
  843. if using_copy_on_write:
  844. assert np.shares_memory(ser.values, ser2.values)
  845. else:
  846. assert not np.shares_memory(ser.values, ser2.values)
  847. # mutating ser triggers a copy-on-write for the column / block
  848. ser2.iloc[0] = 0
  849. assert not np.shares_memory(ser2.values, ser.values)
  850. tm.assert_series_equal(ser, ser_orig)
  851. @pytest.mark.parametrize(
  852. "obj, kwargs",
  853. [(Series([1, 2, 3], name="a"), {}), (DataFrame({"a": [1, 2, 3]}), {"by": "a"})],
  854. )
  855. def test_sort_values(using_copy_on_write, obj, kwargs):
  856. obj_orig = obj.copy()
  857. obj2 = obj.sort_values(**kwargs)
  858. if using_copy_on_write:
  859. assert np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
  860. else:
  861. assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
  862. # mutating df triggers a copy-on-write for the column / block
  863. obj2.iloc[0] = 0
  864. assert not np.shares_memory(get_array(obj2, "a"), get_array(obj, "a"))
  865. tm.assert_equal(obj, obj_orig)
  866. @pytest.mark.parametrize(
  867. "obj, kwargs",
  868. [(Series([1, 2, 3], name="a"), {}), (DataFrame({"a": [1, 2, 3]}), {"by": "a"})],
  869. )
  870. def test_sort_values_inplace(using_copy_on_write, obj, kwargs, using_array_manager):
  871. obj_orig = obj.copy()
  872. view = obj[:]
  873. obj.sort_values(inplace=True, **kwargs)
  874. assert np.shares_memory(get_array(obj, "a"), get_array(view, "a"))
  875. # mutating obj triggers a copy-on-write for the column / block
  876. obj.iloc[0] = 0
  877. if using_copy_on_write:
  878. assert not np.shares_memory(get_array(obj, "a"), get_array(view, "a"))
  879. tm.assert_equal(view, obj_orig)
  880. else:
  881. assert np.shares_memory(get_array(obj, "a"), get_array(view, "a"))
  882. @pytest.mark.parametrize("decimals", [-1, 0, 1])
  883. def test_round(using_copy_on_write, decimals):
  884. df = DataFrame({"a": [1, 2], "b": "c"})
  885. df_orig = df.copy()
  886. df2 = df.round(decimals=decimals)
  887. if using_copy_on_write:
  888. assert np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  889. # TODO: Make inplace by using out parameter of ndarray.round?
  890. if decimals >= 0:
  891. # Ensure lazy copy if no-op
  892. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  893. else:
  894. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  895. else:
  896. assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  897. df2.iloc[0, 1] = "d"
  898. df2.iloc[0, 0] = 4
  899. if using_copy_on_write:
  900. assert not np.shares_memory(get_array(df2, "b"), get_array(df, "b"))
  901. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  902. tm.assert_frame_equal(df, df_orig)
  903. def test_reorder_levels(using_copy_on_write):
  904. index = MultiIndex.from_tuples(
  905. [(1, 1), (1, 2), (2, 1), (2, 2)], names=["one", "two"]
  906. )
  907. df = DataFrame({"a": [1, 2, 3, 4]}, index=index)
  908. df_orig = df.copy()
  909. df2 = df.reorder_levels(order=["two", "one"])
  910. if using_copy_on_write:
  911. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  912. else:
  913. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  914. df2.iloc[0, 0] = 0
  915. if using_copy_on_write:
  916. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  917. tm.assert_frame_equal(df, df_orig)
  918. def test_series_reorder_levels(using_copy_on_write):
  919. index = MultiIndex.from_tuples(
  920. [(1, 1), (1, 2), (2, 1), (2, 2)], names=["one", "two"]
  921. )
  922. ser = Series([1, 2, 3, 4], index=index)
  923. ser_orig = ser.copy()
  924. ser2 = ser.reorder_levels(order=["two", "one"])
  925. if using_copy_on_write:
  926. assert np.shares_memory(ser2.values, ser.values)
  927. else:
  928. assert not np.shares_memory(ser2.values, ser.values)
  929. ser2.iloc[0] = 0
  930. if using_copy_on_write:
  931. assert not np.shares_memory(ser2.values, ser.values)
  932. tm.assert_series_equal(ser, ser_orig)
  933. @pytest.mark.parametrize("obj", [Series([1, 2, 3]), DataFrame({"a": [1, 2, 3]})])
  934. def test_swaplevel(using_copy_on_write, obj):
  935. index = MultiIndex.from_tuples([(1, 1), (1, 2), (2, 1)], names=["one", "two"])
  936. obj.index = index
  937. obj_orig = obj.copy()
  938. obj2 = obj.swaplevel()
  939. if using_copy_on_write:
  940. assert np.shares_memory(obj2.values, obj.values)
  941. else:
  942. assert not np.shares_memory(obj2.values, obj.values)
  943. obj2.iloc[0] = 0
  944. if using_copy_on_write:
  945. assert not np.shares_memory(obj2.values, obj.values)
  946. tm.assert_equal(obj, obj_orig)
  947. def test_frame_set_axis(using_copy_on_write):
  948. # GH 49473
  949. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
  950. df_orig = df.copy()
  951. df2 = df.set_axis(["a", "b", "c"], axis="index")
  952. if using_copy_on_write:
  953. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  954. else:
  955. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  956. # mutating df2 triggers a copy-on-write for that column / block
  957. df2.iloc[0, 0] = 0
  958. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  959. tm.assert_frame_equal(df, df_orig)
  960. def test_series_set_axis(using_copy_on_write):
  961. # GH 49473
  962. ser = Series([1, 2, 3])
  963. ser_orig = ser.copy()
  964. ser2 = ser.set_axis(["a", "b", "c"], axis="index")
  965. if using_copy_on_write:
  966. assert np.shares_memory(ser, ser2)
  967. else:
  968. assert not np.shares_memory(ser, ser2)
  969. # mutating ser triggers a copy-on-write for the column / block
  970. ser2.iloc[0] = 0
  971. assert not np.shares_memory(ser2, ser)
  972. tm.assert_series_equal(ser, ser_orig)
  973. def test_set_flags(using_copy_on_write):
  974. ser = Series([1, 2, 3])
  975. ser_orig = ser.copy()
  976. ser2 = ser.set_flags(allows_duplicate_labels=False)
  977. assert np.shares_memory(ser, ser2)
  978. # mutating ser triggers a copy-on-write for the column / block
  979. ser2.iloc[0] = 0
  980. if using_copy_on_write:
  981. assert not np.shares_memory(ser2, ser)
  982. tm.assert_series_equal(ser, ser_orig)
  983. else:
  984. assert np.shares_memory(ser2, ser)
  985. expected = Series([0, 2, 3])
  986. tm.assert_series_equal(ser, expected)
  987. @pytest.mark.parametrize("kwargs", [{"mapper": "test"}, {"index": "test"}])
  988. def test_rename_axis(using_copy_on_write, kwargs):
  989. df = DataFrame({"a": [1, 2, 3, 4]}, index=Index([1, 2, 3, 4], name="a"))
  990. df_orig = df.copy()
  991. df2 = df.rename_axis(**kwargs)
  992. if using_copy_on_write:
  993. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  994. else:
  995. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  996. df2.iloc[0, 0] = 0
  997. if using_copy_on_write:
  998. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  999. tm.assert_frame_equal(df, df_orig)
  1000. @pytest.mark.parametrize(
  1001. "func, tz", [("tz_convert", "Europe/Berlin"), ("tz_localize", None)]
  1002. )
  1003. def test_tz_convert_localize(using_copy_on_write, func, tz):
  1004. # GH 49473
  1005. ser = Series(
  1006. [1, 2], index=date_range(start="2014-08-01 09:00", freq="H", periods=2, tz=tz)
  1007. )
  1008. ser_orig = ser.copy()
  1009. ser2 = getattr(ser, func)("US/Central")
  1010. if using_copy_on_write:
  1011. assert np.shares_memory(ser.values, ser2.values)
  1012. else:
  1013. assert not np.shares_memory(ser.values, ser2.values)
  1014. # mutating ser triggers a copy-on-write for the column / block
  1015. ser2.iloc[0] = 0
  1016. assert not np.shares_memory(ser2.values, ser.values)
  1017. tm.assert_series_equal(ser, ser_orig)
  1018. def test_droplevel(using_copy_on_write):
  1019. # GH 49473
  1020. index = MultiIndex.from_tuples([(1, 1), (1, 2), (2, 1)], names=["one", "two"])
  1021. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, index=index)
  1022. df_orig = df.copy()
  1023. df2 = df.droplevel(0)
  1024. if using_copy_on_write:
  1025. assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  1026. else:
  1027. assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  1028. # mutating df2 triggers a copy-on-write for that column / block
  1029. df2.iloc[0, 0] = 0
  1030. assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
  1031. tm.assert_frame_equal(df, df_orig)
  1032. def test_squeeze(using_copy_on_write):
  1033. df = DataFrame({"a": [1, 2, 3]})
  1034. df_orig = df.copy()
  1035. series = df.squeeze()
  1036. # Should share memory regardless of CoW since squeeze is just an iloc
  1037. assert np.shares_memory(series.values, get_array(df, "a"))
  1038. # mutating squeezed df triggers a copy-on-write for that column/block
  1039. series.iloc[0] = 0
  1040. if using_copy_on_write:
  1041. assert not np.shares_memory(series.values, get_array(df, "a"))
  1042. tm.assert_frame_equal(df, df_orig)
  1043. else:
  1044. # Without CoW the original will be modified
  1045. assert np.shares_memory(series.values, get_array(df, "a"))
  1046. assert df.loc[0, "a"] == 0
  1047. def test_items(using_copy_on_write):
  1048. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
  1049. df_orig = df.copy()
  1050. # Test this twice, since the second time, the item cache will be
  1051. # triggered, and we want to make sure it still works then.
  1052. for i in range(2):
  1053. for name, ser in df.items():
  1054. assert np.shares_memory(get_array(ser, name), get_array(df, name))
  1055. # mutating df triggers a copy-on-write for that column / block
  1056. ser.iloc[0] = 0
  1057. if using_copy_on_write:
  1058. assert not np.shares_memory(get_array(ser, name), get_array(df, name))
  1059. tm.assert_frame_equal(df, df_orig)
  1060. else:
  1061. # Original frame will be modified
  1062. assert df.loc[0, name] == 0
  1063. @pytest.mark.parametrize("dtype", ["int64", "Int64"])
  1064. def test_putmask(using_copy_on_write, dtype):
  1065. df = DataFrame({"a": [1, 2], "b": 1, "c": 2}, dtype=dtype)
  1066. view = df[:]
  1067. df_orig = df.copy()
  1068. df[df == df] = 5
  1069. if using_copy_on_write:
  1070. assert not np.shares_memory(get_array(view, "a"), get_array(df, "a"))
  1071. tm.assert_frame_equal(view, df_orig)
  1072. else:
  1073. # Without CoW the original will be modified
  1074. assert np.shares_memory(get_array(view, "a"), get_array(df, "a"))
  1075. assert view.iloc[0, 0] == 5
  1076. @pytest.mark.parametrize("dtype", ["int64", "Int64"])
  1077. def test_putmask_no_reference(using_copy_on_write, dtype):
  1078. df = DataFrame({"a": [1, 2], "b": 1, "c": 2}, dtype=dtype)
  1079. arr_a = get_array(df, "a")
  1080. df[df == df] = 5
  1081. if using_copy_on_write:
  1082. assert np.shares_memory(arr_a, get_array(df, "a"))
  1083. @pytest.mark.parametrize("dtype", ["float64", "Float64"])
  1084. def test_putmask_aligns_rhs_no_reference(using_copy_on_write, dtype):
  1085. df = DataFrame({"a": [1.5, 2], "b": 1.5}, dtype=dtype)
  1086. arr_a = get_array(df, "a")
  1087. df[df == df] = DataFrame({"a": [5.5, 5]})
  1088. if using_copy_on_write:
  1089. assert np.shares_memory(arr_a, get_array(df, "a"))
  1090. @pytest.mark.parametrize("val, exp", [(5.5, True), (5, False)])
  1091. def test_putmask_dont_copy_some_blocks(using_copy_on_write, val, exp):
  1092. df = DataFrame({"a": [1, 2], "b": 1, "c": 1.5})
  1093. view = df[:]
  1094. df_orig = df.copy()
  1095. indexer = DataFrame(
  1096. [[True, False, False], [True, False, False]], columns=list("abc")
  1097. )
  1098. df[indexer] = val
  1099. if using_copy_on_write:
  1100. assert not np.shares_memory(get_array(view, "a"), get_array(df, "a"))
  1101. # TODO(CoW): Could split blocks to avoid copying the whole block
  1102. assert np.shares_memory(get_array(view, "b"), get_array(df, "b")) is exp
  1103. assert np.shares_memory(get_array(view, "c"), get_array(df, "c"))
  1104. assert df._mgr._has_no_reference(1) is not exp
  1105. assert not df._mgr._has_no_reference(2)
  1106. tm.assert_frame_equal(view, df_orig)
  1107. elif val == 5:
  1108. # Without CoW the original will be modified, the other case upcasts, e.g. copy
  1109. assert np.shares_memory(get_array(view, "a"), get_array(df, "a"))
  1110. assert np.shares_memory(get_array(view, "c"), get_array(df, "c"))
  1111. assert view.iloc[0, 0] == 5
  1112. @pytest.mark.parametrize("dtype", ["int64", "Int64"])
  1113. def test_where_noop(using_copy_on_write, dtype):
  1114. ser = Series([1, 2, 3], dtype=dtype)
  1115. ser_orig = ser.copy()
  1116. result = ser.where(ser > 0, 10)
  1117. if using_copy_on_write:
  1118. assert np.shares_memory(get_array(ser), get_array(result))
  1119. else:
  1120. assert not np.shares_memory(get_array(ser), get_array(result))
  1121. result.iloc[0] = 10
  1122. if using_copy_on_write:
  1123. assert not np.shares_memory(get_array(ser), get_array(result))
  1124. tm.assert_series_equal(ser, ser_orig)
  1125. @pytest.mark.parametrize("dtype", ["int64", "Int64"])
  1126. def test_where(using_copy_on_write, dtype):
  1127. ser = Series([1, 2, 3], dtype=dtype)
  1128. ser_orig = ser.copy()
  1129. result = ser.where(ser < 0, 10)
  1130. assert not np.shares_memory(get_array(ser), get_array(result))
  1131. tm.assert_series_equal(ser, ser_orig)
  1132. @pytest.mark.parametrize("dtype, val", [("int64", 10.5), ("Int64", 10)])
  1133. def test_where_noop_on_single_column(using_copy_on_write, dtype, val):
  1134. df = DataFrame({"a": [1, 2, 3], "b": [-4, -5, -6]}, dtype=dtype)
  1135. df_orig = df.copy()
  1136. result = df.where(df < 0, val)
  1137. if using_copy_on_write:
  1138. assert np.shares_memory(get_array(df, "b"), get_array(result, "b"))
  1139. assert not np.shares_memory(get_array(df, "a"), get_array(result, "a"))
  1140. else:
  1141. assert not np.shares_memory(get_array(df, "b"), get_array(result, "b"))
  1142. result.iloc[0, 1] = 10
  1143. if using_copy_on_write:
  1144. assert not np.shares_memory(get_array(df, "b"), get_array(result, "b"))
  1145. tm.assert_frame_equal(df, df_orig)
  1146. def test_asfreq_noop(using_copy_on_write):
  1147. df = DataFrame(
  1148. {"a": [0.0, None, 2.0, 3.0]},
  1149. index=date_range("1/1/2000", periods=4, freq="T"),
  1150. )
  1151. df_orig = df.copy()
  1152. df2 = df.asfreq(freq="T")
  1153. if using_copy_on_write:
  1154. assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  1155. else:
  1156. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  1157. # mutating df2 triggers a copy-on-write for that column / block
  1158. df2.iloc[0, 0] = 0
  1159. assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
  1160. tm.assert_frame_equal(df, df_orig)
  1161. def test_iterrows(using_copy_on_write):
  1162. df = DataFrame({"a": 0, "b": 1}, index=[1, 2, 3])
  1163. df_orig = df.copy()
  1164. for _, sub in df.iterrows():
  1165. sub.iloc[0] = 100
  1166. if using_copy_on_write:
  1167. tm.assert_frame_equal(df, df_orig)
  1168. def test_interpolate_creates_copy(using_copy_on_write):
  1169. # GH#51126
  1170. df = DataFrame({"a": [1.5, np.nan, 3]})
  1171. view = df[:]
  1172. expected = df.copy()
  1173. df.ffill(inplace=True)
  1174. df.iloc[0, 0] = 100.5
  1175. if using_copy_on_write:
  1176. tm.assert_frame_equal(view, expected)
  1177. else:
  1178. expected = DataFrame({"a": [100.5, 1.5, 3]})
  1179. tm.assert_frame_equal(view, expected)
  1180. def test_isetitem(using_copy_on_write):
  1181. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
  1182. df_orig = df.copy()
  1183. df2 = df.copy(deep=None) # Trigger a CoW
  1184. df2.isetitem(1, np.array([-1, -2, -3])) # This is inplace
  1185. if using_copy_on_write:
  1186. assert np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
  1187. assert np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
  1188. else:
  1189. assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
  1190. assert not np.shares_memory(get_array(df, "a"), get_array(df2, "a"))
  1191. df2.loc[0, "a"] = 0
  1192. tm.assert_frame_equal(df, df_orig) # Original is unchanged
  1193. if using_copy_on_write:
  1194. assert np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
  1195. else:
  1196. assert not np.shares_memory(get_array(df, "c"), get_array(df2, "c"))
  1197. @pytest.mark.parametrize(
  1198. "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
  1199. )
  1200. def test_isetitem_series(using_copy_on_write, dtype):
  1201. df = DataFrame({"a": [1, 2, 3], "b": np.array([4, 5, 6], dtype=dtype)})
  1202. ser = Series([7, 8, 9])
  1203. ser_orig = ser.copy()
  1204. df.isetitem(0, ser)
  1205. if using_copy_on_write:
  1206. # TODO(CoW) this can share memory
  1207. assert not np.shares_memory(get_array(df, "a"), get_array(ser))
  1208. # mutating dataframe doesn't update series
  1209. df.loc[0, "a"] = 0
  1210. tm.assert_series_equal(ser, ser_orig)
  1211. # mutating series doesn't update dataframe
  1212. df = DataFrame({"a": [1, 2, 3], "b": np.array([4, 5, 6], dtype=dtype)})
  1213. ser = Series([7, 8, 9])
  1214. df.isetitem(0, ser)
  1215. ser.loc[0] = 0
  1216. expected = DataFrame({"a": [7, 8, 9], "b": np.array([4, 5, 6], dtype=dtype)})
  1217. tm.assert_frame_equal(df, expected)
  1218. @pytest.mark.parametrize("key", ["a", ["a"]])
  1219. def test_get(using_copy_on_write, key):
  1220. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  1221. df_orig = df.copy()
  1222. result = df.get(key)
  1223. if using_copy_on_write:
  1224. assert np.shares_memory(get_array(result, "a"), get_array(df, "a"))
  1225. result.iloc[0] = 0
  1226. assert not np.shares_memory(get_array(result, "a"), get_array(df, "a"))
  1227. tm.assert_frame_equal(df, df_orig)
  1228. else:
  1229. # for non-CoW it depends on whether we got a Series or DataFrame if it
  1230. # is a view or copy or triggers a warning or not
  1231. warn = SettingWithCopyWarning if isinstance(key, list) else None
  1232. with pd.option_context("chained_assignment", "warn"):
  1233. with tm.assert_produces_warning(warn):
  1234. result.iloc[0] = 0
  1235. if isinstance(key, list):
  1236. tm.assert_frame_equal(df, df_orig)
  1237. else:
  1238. assert df.iloc[0, 0] == 0
  1239. @pytest.mark.parametrize("axis, key", [(0, 0), (1, "a")])
  1240. @pytest.mark.parametrize(
  1241. "dtype", ["int64", "float64"], ids=["single-block", "mixed-block"]
  1242. )
  1243. def test_xs(using_copy_on_write, using_array_manager, axis, key, dtype):
  1244. single_block = (dtype == "int64") and not using_array_manager
  1245. is_view = single_block or (using_array_manager and axis == 1)
  1246. df = DataFrame(
  1247. {"a": [1, 2, 3], "b": [4, 5, 6], "c": np.array([7, 8, 9], dtype=dtype)}
  1248. )
  1249. df_orig = df.copy()
  1250. result = df.xs(key, axis=axis)
  1251. if axis == 1 or single_block:
  1252. assert np.shares_memory(get_array(df, "a"), get_array(result))
  1253. elif using_copy_on_write:
  1254. assert result._mgr._has_no_reference(0)
  1255. if using_copy_on_write or is_view:
  1256. result.iloc[0] = 0
  1257. else:
  1258. with pd.option_context("chained_assignment", "warn"):
  1259. with tm.assert_produces_warning(SettingWithCopyWarning):
  1260. result.iloc[0] = 0
  1261. if using_copy_on_write or (not single_block and axis == 0):
  1262. tm.assert_frame_equal(df, df_orig)
  1263. else:
  1264. assert df.iloc[0, 0] == 0
  1265. @pytest.mark.parametrize("axis", [0, 1])
  1266. @pytest.mark.parametrize("key, level", [("l1", 0), (2, 1)])
  1267. def test_xs_multiindex(using_copy_on_write, using_array_manager, key, level, axis):
  1268. arr = np.arange(18).reshape(6, 3)
  1269. index = MultiIndex.from_product([["l1", "l2"], [1, 2, 3]], names=["lev1", "lev2"])
  1270. df = DataFrame(arr, index=index, columns=list("abc"))
  1271. if axis == 1:
  1272. df = df.transpose().copy()
  1273. df_orig = df.copy()
  1274. result = df.xs(key, level=level, axis=axis)
  1275. if level == 0:
  1276. assert np.shares_memory(
  1277. get_array(df, df.columns[0]), get_array(result, result.columns[0])
  1278. )
  1279. warn = (
  1280. SettingWithCopyWarning
  1281. if not using_copy_on_write and not using_array_manager
  1282. else None
  1283. )
  1284. with pd.option_context("chained_assignment", "warn"):
  1285. with tm.assert_produces_warning(warn):
  1286. result.iloc[0, 0] = 0
  1287. tm.assert_frame_equal(df, df_orig)
  1288. def test_update_frame(using_copy_on_write):
  1289. df1 = DataFrame({"a": [1.0, 2.0, 3.0], "b": [4.0, 5.0, 6.0]})
  1290. df2 = DataFrame({"b": [100.0]}, index=[1])
  1291. df1_orig = df1.copy()
  1292. view = df1[:]
  1293. df1.update(df2)
  1294. expected = DataFrame({"a": [1.0, 2.0, 3.0], "b": [4.0, 100.0, 6.0]})
  1295. tm.assert_frame_equal(df1, expected)
  1296. if using_copy_on_write:
  1297. # df1 is updated, but its view not
  1298. tm.assert_frame_equal(view, df1_orig)
  1299. assert np.shares_memory(get_array(df1, "a"), get_array(view, "a"))
  1300. assert not np.shares_memory(get_array(df1, "b"), get_array(view, "b"))
  1301. else:
  1302. tm.assert_frame_equal(view, expected)
  1303. def test_update_series(using_copy_on_write):
  1304. ser1 = Series([1.0, 2.0, 3.0])
  1305. ser2 = Series([100.0], index=[1])
  1306. ser1_orig = ser1.copy()
  1307. view = ser1[:]
  1308. ser1.update(ser2)
  1309. expected = Series([1.0, 100.0, 3.0])
  1310. tm.assert_series_equal(ser1, expected)
  1311. if using_copy_on_write:
  1312. # ser1 is updated, but its view not
  1313. tm.assert_series_equal(view, ser1_orig)
  1314. else:
  1315. tm.assert_series_equal(view, expected)
  1316. def test_inplace_arithmetic_series():
  1317. ser = Series([1, 2, 3])
  1318. data = get_array(ser)
  1319. ser *= 2
  1320. assert np.shares_memory(get_array(ser), data)
  1321. tm.assert_numpy_array_equal(data, get_array(ser))
  1322. def test_inplace_arithmetic_series_with_reference(using_copy_on_write):
  1323. ser = Series([1, 2, 3])
  1324. ser_orig = ser.copy()
  1325. view = ser[:]
  1326. ser *= 2
  1327. if using_copy_on_write:
  1328. assert not np.shares_memory(get_array(ser), get_array(view))
  1329. tm.assert_series_equal(ser_orig, view)
  1330. else:
  1331. assert np.shares_memory(get_array(ser), get_array(view))
  1332. @pytest.mark.parametrize("copy", [True, False])
  1333. def test_transpose(using_copy_on_write, copy, using_array_manager):
  1334. df = DataFrame({"a": [1, 2, 3], "b": 1})
  1335. df_orig = df.copy()
  1336. result = df.transpose(copy=copy)
  1337. if not copy and not using_array_manager or using_copy_on_write:
  1338. assert np.shares_memory(get_array(df, "a"), get_array(result, 0))
  1339. else:
  1340. assert not np.shares_memory(get_array(df, "a"), get_array(result, 0))
  1341. result.iloc[0, 0] = 100
  1342. if using_copy_on_write:
  1343. tm.assert_frame_equal(df, df_orig)
  1344. def test_transpose_different_dtypes(using_copy_on_write):
  1345. df = DataFrame({"a": [1, 2, 3], "b": 1.5})
  1346. df_orig = df.copy()
  1347. result = df.T
  1348. assert not np.shares_memory(get_array(df, "a"), get_array(result, 0))
  1349. result.iloc[0, 0] = 100
  1350. if using_copy_on_write:
  1351. tm.assert_frame_equal(df, df_orig)
  1352. def test_transpose_ea_single_column(using_copy_on_write):
  1353. df = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
  1354. result = df.T
  1355. assert not np.shares_memory(get_array(df, "a"), get_array(result, 0))
  1356. def test_count_read_only_array():
  1357. df = DataFrame({"a": [1, 2], "b": 3})
  1358. result = df.count()
  1359. result.iloc[0] = 100
  1360. expected = Series([100, 2], index=["a", "b"])
  1361. tm.assert_series_equal(result, expected)
  1362. def test_series_view(using_copy_on_write):
  1363. ser = Series([1, 2, 3])
  1364. ser_orig = ser.copy()
  1365. ser2 = ser.view()
  1366. assert np.shares_memory(get_array(ser), get_array(ser2))
  1367. if using_copy_on_write:
  1368. assert not ser2._mgr._has_no_reference(0)
  1369. ser2.iloc[0] = 100
  1370. if using_copy_on_write:
  1371. tm.assert_series_equal(ser_orig, ser)
  1372. else:
  1373. expected = Series([100, 2, 3])
  1374. tm.assert_series_equal(ser, expected)