test_array.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. import numpy as np
  2. import pytest
  3. from pandas import (
  4. DataFrame,
  5. Series,
  6. date_range,
  7. )
  8. import pandas._testing as tm
  9. from pandas.tests.copy_view.util import get_array
  10. # -----------------------------------------------------------------------------
  11. # Copy/view behaviour for accessing underlying array of Series/DataFrame
  12. @pytest.mark.parametrize(
  13. "method",
  14. [lambda ser: ser.values, lambda ser: np.asarray(ser)],
  15. ids=["values", "asarray"],
  16. )
  17. def test_series_values(using_copy_on_write, method):
  18. ser = Series([1, 2, 3], name="name")
  19. ser_orig = ser.copy()
  20. arr = method(ser)
  21. if using_copy_on_write:
  22. # .values still gives a view but is read-only
  23. assert np.shares_memory(arr, get_array(ser, "name"))
  24. assert arr.flags.writeable is False
  25. # mutating series through arr therefore doesn't work
  26. with pytest.raises(ValueError, match="read-only"):
  27. arr[0] = 0
  28. tm.assert_series_equal(ser, ser_orig)
  29. # mutating the series itself still works
  30. ser.iloc[0] = 0
  31. assert ser.values[0] == 0
  32. else:
  33. assert arr.flags.writeable is True
  34. arr[0] = 0
  35. assert ser.iloc[0] == 0
  36. @pytest.mark.parametrize(
  37. "method",
  38. [lambda df: df.values, lambda df: np.asarray(df)],
  39. ids=["values", "asarray"],
  40. )
  41. def test_dataframe_values(using_copy_on_write, using_array_manager, method):
  42. df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
  43. df_orig = df.copy()
  44. arr = method(df)
  45. if using_copy_on_write:
  46. # .values still gives a view but is read-only
  47. assert np.shares_memory(arr, get_array(df, "a"))
  48. assert arr.flags.writeable is False
  49. # mutating series through arr therefore doesn't work
  50. with pytest.raises(ValueError, match="read-only"):
  51. arr[0, 0] = 0
  52. tm.assert_frame_equal(df, df_orig)
  53. # mutating the series itself still works
  54. df.iloc[0, 0] = 0
  55. assert df.values[0, 0] == 0
  56. else:
  57. assert arr.flags.writeable is True
  58. arr[0, 0] = 0
  59. if not using_array_manager:
  60. assert df.iloc[0, 0] == 0
  61. else:
  62. tm.assert_frame_equal(df, df_orig)
  63. def test_series_to_numpy(using_copy_on_write):
  64. ser = Series([1, 2, 3], name="name")
  65. ser_orig = ser.copy()
  66. # default: copy=False, no dtype or NAs
  67. arr = ser.to_numpy()
  68. if using_copy_on_write:
  69. # to_numpy still gives a view but is read-only
  70. assert np.shares_memory(arr, get_array(ser, "name"))
  71. assert arr.flags.writeable is False
  72. # mutating series through arr therefore doesn't work
  73. with pytest.raises(ValueError, match="read-only"):
  74. arr[0] = 0
  75. tm.assert_series_equal(ser, ser_orig)
  76. # mutating the series itself still works
  77. ser.iloc[0] = 0
  78. assert ser.values[0] == 0
  79. else:
  80. assert arr.flags.writeable is True
  81. arr[0] = 0
  82. assert ser.iloc[0] == 0
  83. # specify copy=False gives a writeable array
  84. ser = Series([1, 2, 3], name="name")
  85. arr = ser.to_numpy(copy=True)
  86. assert not np.shares_memory(arr, get_array(ser, "name"))
  87. assert arr.flags.writeable is True
  88. # specifying a dtype that already causes a copy also gives a writeable array
  89. ser = Series([1, 2, 3], name="name")
  90. arr = ser.to_numpy(dtype="float64")
  91. assert not np.shares_memory(arr, get_array(ser, "name"))
  92. assert arr.flags.writeable is True
  93. @pytest.mark.parametrize("order", ["F", "C"])
  94. def test_ravel_read_only(using_copy_on_write, order):
  95. ser = Series([1, 2, 3])
  96. arr = ser.ravel(order=order)
  97. if using_copy_on_write:
  98. assert arr.flags.writeable is False
  99. assert np.shares_memory(get_array(ser), arr)
  100. def test_series_array_ea_dtypes(using_copy_on_write):
  101. ser = Series([1, 2, 3], dtype="Int64")
  102. arr = np.asarray(ser, dtype="int64")
  103. assert np.shares_memory(arr, get_array(ser))
  104. if using_copy_on_write:
  105. assert arr.flags.writeable is False
  106. else:
  107. assert arr.flags.writeable is True
  108. arr = np.asarray(ser)
  109. assert not np.shares_memory(arr, get_array(ser))
  110. assert arr.flags.writeable is True
  111. def test_dataframe_array_ea_dtypes(using_copy_on_write):
  112. df = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
  113. arr = np.asarray(df, dtype="int64")
  114. # TODO: This should be able to share memory, but we are roundtripping
  115. # through object
  116. assert not np.shares_memory(arr, get_array(df, "a"))
  117. assert arr.flags.writeable is True
  118. arr = np.asarray(df)
  119. if using_copy_on_write:
  120. # TODO(CoW): This should be True
  121. assert arr.flags.writeable is False
  122. else:
  123. assert arr.flags.writeable is True
  124. def test_dataframe_array_string_dtype(using_copy_on_write, using_array_manager):
  125. df = DataFrame({"a": ["a", "b"]}, dtype="string")
  126. arr = np.asarray(df)
  127. if not using_array_manager:
  128. assert np.shares_memory(arr, get_array(df, "a"))
  129. if using_copy_on_write:
  130. assert arr.flags.writeable is False
  131. else:
  132. assert arr.flags.writeable is True
  133. def test_dataframe_multiple_numpy_dtypes():
  134. df = DataFrame({"a": [1, 2, 3], "b": 1.5})
  135. arr = np.asarray(df)
  136. assert not np.shares_memory(arr, get_array(df, "a"))
  137. assert arr.flags.writeable is True
  138. def test_values_is_ea(using_copy_on_write):
  139. df = DataFrame({"a": date_range("2012-01-01", periods=3)})
  140. arr = np.asarray(df)
  141. if using_copy_on_write:
  142. assert arr.flags.writeable is False
  143. else:
  144. assert arr.flags.writeable is True
  145. def test_empty_dataframe():
  146. df = DataFrame()
  147. arr = np.asarray(df)
  148. assert arr.flags.writeable is True