test_setitem.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091
  1. import numpy as np
  2. from pandas import (
  3. DataFrame,
  4. Index,
  5. RangeIndex,
  6. Series,
  7. )
  8. import pandas._testing as tm
  9. # -----------------------------------------------------------------------------
  10. # Copy/view behaviour for the values that are set in a DataFrame
  11. def test_set_column_with_array():
  12. # Case: setting an array as a new column (df[col] = arr) copies that data
  13. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  14. arr = np.array([1, 2, 3], dtype="int64")
  15. df["c"] = arr
  16. # the array data is copied
  17. assert not np.shares_memory(df["c"].values, arr)
  18. # and thus modifying the array does not modify the DataFrame
  19. arr[0] = 0
  20. tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
  21. def test_set_column_with_series(using_copy_on_write):
  22. # Case: setting a series as a new column (df[col] = s) copies that data
  23. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  24. ser = Series([1, 2, 3])
  25. df["c"] = ser
  26. if using_copy_on_write:
  27. # TODO(CoW) with CoW we can delay the copy
  28. # assert np.shares_memory(df["c"].values, ser.values)
  29. assert not np.shares_memory(df["c"].values, ser.values)
  30. else:
  31. # the series data is copied
  32. assert not np.shares_memory(df["c"].values, ser.values)
  33. # and modifying the series does not modify the DataFrame
  34. ser.iloc[0] = 0
  35. assert ser.iloc[0] == 0
  36. tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
  37. def test_set_column_with_index(using_copy_on_write):
  38. # Case: setting an index as a new column (df[col] = idx) copies that data
  39. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  40. idx = Index([1, 2, 3])
  41. df["c"] = idx
  42. # the index data is copied
  43. assert not np.shares_memory(df["c"].values, idx.values)
  44. # and thus modifying the index does not modify the DataFrame
  45. idx.values[0] = 0
  46. tm.assert_series_equal(df["c"], Series([1, 2, 3], name="c"))
  47. idx = RangeIndex(1, 4)
  48. arr = idx.values
  49. df["d"] = idx
  50. assert not np.shares_memory(df["d"].values, arr)
  51. arr[0] = 0
  52. tm.assert_series_equal(df["d"], Series([1, 2, 3], name="d"))
  53. def test_set_columns_with_dataframe(using_copy_on_write):
  54. # Case: setting a DataFrame as new columns copies that data
  55. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  56. df2 = DataFrame({"c": [7, 8, 9], "d": [10, 11, 12]})
  57. df[["c", "d"]] = df2
  58. if using_copy_on_write:
  59. # TODO(CoW) with CoW we can delay the copy
  60. # assert np.shares_memory(df["c"].values, df2["c"].values)
  61. assert not np.shares_memory(df["c"].values, df2["c"].values)
  62. else:
  63. # the data is copied
  64. assert not np.shares_memory(df["c"].values, df2["c"].values)
  65. # and modifying the set DataFrame does not modify the original DataFrame
  66. df2.iloc[0, 0] = 0
  67. tm.assert_series_equal(df["c"], Series([7, 8, 9], name="c"))