test_construction.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. import pandas._testing as tm
  5. from pandas.core.arrays import FloatingArray
  6. from pandas.core.arrays.floating import (
  7. Float32Dtype,
  8. Float64Dtype,
  9. )
  10. def test_uses_pandas_na():
  11. a = pd.array([1, None], dtype=Float64Dtype())
  12. assert a[1] is pd.NA
  13. def test_floating_array_constructor():
  14. values = np.array([1, 2, 3, 4], dtype="float64")
  15. mask = np.array([False, False, False, True], dtype="bool")
  16. result = FloatingArray(values, mask)
  17. expected = pd.array([1, 2, 3, np.nan], dtype="Float64")
  18. tm.assert_extension_array_equal(result, expected)
  19. tm.assert_numpy_array_equal(result._data, values)
  20. tm.assert_numpy_array_equal(result._mask, mask)
  21. msg = r".* should be .* numpy array. Use the 'pd.array' function instead"
  22. with pytest.raises(TypeError, match=msg):
  23. FloatingArray(values.tolist(), mask)
  24. with pytest.raises(TypeError, match=msg):
  25. FloatingArray(values, mask.tolist())
  26. with pytest.raises(TypeError, match=msg):
  27. FloatingArray(values.astype(int), mask)
  28. msg = r"__init__\(\) missing 1 required positional argument: 'mask'"
  29. with pytest.raises(TypeError, match=msg):
  30. FloatingArray(values)
  31. def test_floating_array_disallows_float16():
  32. # GH#44715
  33. arr = np.array([1, 2], dtype=np.float16)
  34. mask = np.array([False, False])
  35. msg = "FloatingArray does not support np.float16 dtype"
  36. with pytest.raises(TypeError, match=msg):
  37. FloatingArray(arr, mask)
  38. def test_floating_array_disallows_Float16_dtype(request):
  39. # GH#44715
  40. with pytest.raises(TypeError, match="data type 'Float16' not understood"):
  41. pd.array([1.0, 2.0], dtype="Float16")
  42. def test_floating_array_constructor_copy():
  43. values = np.array([1, 2, 3, 4], dtype="float64")
  44. mask = np.array([False, False, False, True], dtype="bool")
  45. result = FloatingArray(values, mask)
  46. assert result._data is values
  47. assert result._mask is mask
  48. result = FloatingArray(values, mask, copy=True)
  49. assert result._data is not values
  50. assert result._mask is not mask
  51. def test_to_array():
  52. result = pd.array([0.1, 0.2, 0.3, 0.4])
  53. expected = pd.array([0.1, 0.2, 0.3, 0.4], dtype="Float64")
  54. tm.assert_extension_array_equal(result, expected)
  55. @pytest.mark.parametrize(
  56. "a, b",
  57. [
  58. ([1, None], [1, pd.NA]),
  59. ([None], [pd.NA]),
  60. ([None, np.nan], [pd.NA, pd.NA]),
  61. ([1, np.nan], [1, pd.NA]),
  62. ([np.nan], [pd.NA]),
  63. ],
  64. )
  65. def test_to_array_none_is_nan(a, b):
  66. result = pd.array(a, dtype="Float64")
  67. expected = pd.array(b, dtype="Float64")
  68. tm.assert_extension_array_equal(result, expected)
  69. def test_to_array_mixed_integer_float():
  70. result = pd.array([1, 2.0])
  71. expected = pd.array([1.0, 2.0], dtype="Float64")
  72. tm.assert_extension_array_equal(result, expected)
  73. result = pd.array([1, None, 2.0])
  74. expected = pd.array([1.0, None, 2.0], dtype="Float64")
  75. tm.assert_extension_array_equal(result, expected)
  76. @pytest.mark.parametrize(
  77. "values",
  78. [
  79. ["foo", "bar"],
  80. "foo",
  81. 1,
  82. 1.0,
  83. pd.date_range("20130101", periods=2),
  84. np.array(["foo"]),
  85. [[1, 2], [3, 4]],
  86. [np.nan, {"a": 1}],
  87. # GH#44514 all-NA case used to get quietly swapped out before checking ndim
  88. np.array([pd.NA] * 6, dtype=object).reshape(3, 2),
  89. ],
  90. )
  91. def test_to_array_error(values):
  92. # error in converting existing arrays to FloatingArray
  93. msg = "|".join(
  94. [
  95. "cannot be converted to FloatingDtype",
  96. "values must be a 1D list-like",
  97. "Cannot pass scalar",
  98. r"float\(\) argument must be a string or a (real )?number, not 'dict'",
  99. "could not convert string to float: 'foo'",
  100. ]
  101. )
  102. with pytest.raises((TypeError, ValueError), match=msg):
  103. pd.array(values, dtype="Float64")
  104. @pytest.mark.parametrize("values", [["1", "2", None], ["1.5", "2", None]])
  105. def test_construct_from_float_strings(values):
  106. # see also test_to_integer_array_str
  107. expected = pd.array([float(values[0]), 2, None], dtype="Float64")
  108. res = pd.array(values, dtype="Float64")
  109. tm.assert_extension_array_equal(res, expected)
  110. res = FloatingArray._from_sequence(values)
  111. tm.assert_extension_array_equal(res, expected)
  112. def test_to_array_inferred_dtype():
  113. # if values has dtype -> respect it
  114. result = pd.array(np.array([1, 2], dtype="float32"))
  115. assert result.dtype == Float32Dtype()
  116. # if values have no dtype -> always float64
  117. result = pd.array([1.0, 2.0])
  118. assert result.dtype == Float64Dtype()
  119. def test_to_array_dtype_keyword():
  120. result = pd.array([1, 2], dtype="Float32")
  121. assert result.dtype == Float32Dtype()
  122. # if values has dtype -> override it
  123. result = pd.array(np.array([1, 2], dtype="float32"), dtype="Float64")
  124. assert result.dtype == Float64Dtype()
  125. def test_to_array_integer():
  126. result = pd.array([1, 2], dtype="Float64")
  127. expected = pd.array([1.0, 2.0], dtype="Float64")
  128. tm.assert_extension_array_equal(result, expected)
  129. # for integer dtypes, the itemsize is not preserved
  130. # TODO can we specify "floating" in general?
  131. result = pd.array(np.array([1, 2], dtype="int32"), dtype="Float64")
  132. assert result.dtype == Float64Dtype()
  133. @pytest.mark.parametrize(
  134. "bool_values, values, target_dtype, expected_dtype",
  135. [
  136. ([False, True], [0, 1], Float64Dtype(), Float64Dtype()),
  137. ([False, True], [0, 1], "Float64", Float64Dtype()),
  138. ([False, True, np.nan], [0, 1, np.nan], Float64Dtype(), Float64Dtype()),
  139. ],
  140. )
  141. def test_to_array_bool(bool_values, values, target_dtype, expected_dtype):
  142. result = pd.array(bool_values, dtype=target_dtype)
  143. assert result.dtype == expected_dtype
  144. expected = pd.array(values, dtype=target_dtype)
  145. tm.assert_extension_array_equal(result, expected)
  146. def test_series_from_float(data):
  147. # construct from our dtype & string dtype
  148. dtype = data.dtype
  149. # from float
  150. expected = pd.Series(data)
  151. result = pd.Series(data.to_numpy(na_value=np.nan, dtype="float"), dtype=str(dtype))
  152. tm.assert_series_equal(result, expected)
  153. # from list
  154. expected = pd.Series(data)
  155. result = pd.Series(np.array(data).tolist(), dtype=str(dtype))
  156. tm.assert_series_equal(result, expected)