test_to_numpy.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. import pandas._testing as tm
  5. from pandas.core.arrays import FloatingArray
  6. @pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
  7. def test_to_numpy(box):
  8. con = pd.Series if box else pd.array
  9. # default (with or without missing values) -> object dtype
  10. arr = con([0.1, 0.2, 0.3], dtype="Float64")
  11. result = arr.to_numpy()
  12. expected = np.array([0.1, 0.2, 0.3], dtype="object")
  13. tm.assert_numpy_array_equal(result, expected)
  14. arr = con([0.1, 0.2, None], dtype="Float64")
  15. result = arr.to_numpy()
  16. expected = np.array([0.1, 0.2, pd.NA], dtype="object")
  17. tm.assert_numpy_array_equal(result, expected)
  18. @pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
  19. def test_to_numpy_float(box):
  20. con = pd.Series if box else pd.array
  21. # no missing values -> can convert to float, otherwise raises
  22. arr = con([0.1, 0.2, 0.3], dtype="Float64")
  23. result = arr.to_numpy(dtype="float64")
  24. expected = np.array([0.1, 0.2, 0.3], dtype="float64")
  25. tm.assert_numpy_array_equal(result, expected)
  26. arr = con([0.1, 0.2, None], dtype="Float64")
  27. with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"):
  28. result = arr.to_numpy(dtype="float64")
  29. # need to explicitly specify na_value
  30. result = arr.to_numpy(dtype="float64", na_value=np.nan)
  31. expected = np.array([0.1, 0.2, np.nan], dtype="float64")
  32. tm.assert_numpy_array_equal(result, expected)
  33. @pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
  34. def test_to_numpy_int(box):
  35. con = pd.Series if box else pd.array
  36. # no missing values -> can convert to int, otherwise raises
  37. arr = con([1.0, 2.0, 3.0], dtype="Float64")
  38. result = arr.to_numpy(dtype="int64")
  39. expected = np.array([1, 2, 3], dtype="int64")
  40. tm.assert_numpy_array_equal(result, expected)
  41. arr = con([1.0, 2.0, None], dtype="Float64")
  42. with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"):
  43. result = arr.to_numpy(dtype="int64")
  44. # automatic casting (floors the values)
  45. arr = con([0.1, 0.9, 1.1], dtype="Float64")
  46. result = arr.to_numpy(dtype="int64")
  47. expected = np.array([0, 0, 1], dtype="int64")
  48. tm.assert_numpy_array_equal(result, expected)
  49. @pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
  50. def test_to_numpy_na_value(box):
  51. con = pd.Series if box else pd.array
  52. arr = con([0.0, 1.0, None], dtype="Float64")
  53. result = arr.to_numpy(dtype=object, na_value=None)
  54. expected = np.array([0.0, 1.0, None], dtype="object")
  55. tm.assert_numpy_array_equal(result, expected)
  56. result = arr.to_numpy(dtype=bool, na_value=False)
  57. expected = np.array([False, True, False], dtype="bool")
  58. tm.assert_numpy_array_equal(result, expected)
  59. result = arr.to_numpy(dtype="int64", na_value=-99)
  60. expected = np.array([0, 1, -99], dtype="int64")
  61. tm.assert_numpy_array_equal(result, expected)
  62. def test_to_numpy_na_value_with_nan():
  63. # array with both NaN and NA -> only fill NA with `na_value`
  64. arr = FloatingArray(np.array([0.0, np.nan, 0.0]), np.array([False, False, True]))
  65. result = arr.to_numpy(dtype="float64", na_value=-1)
  66. expected = np.array([0.0, np.nan, -1.0], dtype="float64")
  67. tm.assert_numpy_array_equal(result, expected)
  68. @pytest.mark.parametrize("dtype", ["float64", "float32", "int32", "int64", "bool"])
  69. @pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
  70. def test_to_numpy_dtype(box, dtype):
  71. con = pd.Series if box else pd.array
  72. arr = con([0.0, 1.0], dtype="Float64")
  73. result = arr.to_numpy(dtype=dtype)
  74. expected = np.array([0, 1], dtype=dtype)
  75. tm.assert_numpy_array_equal(result, expected)
  76. @pytest.mark.parametrize("dtype", ["float64", "float32", "int32", "int64", "bool"])
  77. @pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
  78. def test_to_numpy_na_raises(box, dtype):
  79. con = pd.Series if box else pd.array
  80. arr = con([0.0, 1.0, None], dtype="Float64")
  81. with pytest.raises(ValueError, match=dtype):
  82. arr.to_numpy(dtype=dtype)
  83. @pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
  84. def test_to_numpy_string(box, dtype):
  85. con = pd.Series if box else pd.array
  86. arr = con([0.0, 1.0, None], dtype="Float64")
  87. result = arr.to_numpy(dtype="str")
  88. expected = np.array([0.0, 1.0, pd.NA], dtype=f"{tm.ENDIAN}U32")
  89. tm.assert_numpy_array_equal(result, expected)
  90. def test_to_numpy_copy():
  91. # to_numpy can be zero-copy if no missing values
  92. arr = pd.array([0.1, 0.2, 0.3], dtype="Float64")
  93. result = arr.to_numpy(dtype="float64")
  94. result[0] = 10
  95. tm.assert_extension_array_equal(arr, pd.array([10, 0.2, 0.3], dtype="Float64"))
  96. arr = pd.array([0.1, 0.2, 0.3], dtype="Float64")
  97. result = arr.to_numpy(dtype="float64", copy=True)
  98. result[0] = 10
  99. tm.assert_extension_array_equal(arr, pd.array([0.1, 0.2, 0.3], dtype="Float64"))