test_construction.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. import numpy as np
  2. import pytest
  3. import pandas as pd
  4. import pandas._testing as tm
  5. from pandas.arrays import BooleanArray
  6. from pandas.core.arrays.boolean import coerce_to_array
  7. def test_boolean_array_constructor():
  8. values = np.array([True, False, True, False], dtype="bool")
  9. mask = np.array([False, False, False, True], dtype="bool")
  10. result = BooleanArray(values, mask)
  11. expected = pd.array([True, False, True, None], dtype="boolean")
  12. tm.assert_extension_array_equal(result, expected)
  13. with pytest.raises(TypeError, match="values should be boolean numpy array"):
  14. BooleanArray(values.tolist(), mask)
  15. with pytest.raises(TypeError, match="mask should be boolean numpy array"):
  16. BooleanArray(values, mask.tolist())
  17. with pytest.raises(TypeError, match="values should be boolean numpy array"):
  18. BooleanArray(values.astype(int), mask)
  19. with pytest.raises(TypeError, match="mask should be boolean numpy array"):
  20. BooleanArray(values, None)
  21. with pytest.raises(ValueError, match="values.shape must match mask.shape"):
  22. BooleanArray(values.reshape(1, -1), mask)
  23. with pytest.raises(ValueError, match="values.shape must match mask.shape"):
  24. BooleanArray(values, mask.reshape(1, -1))
  25. def test_boolean_array_constructor_copy():
  26. values = np.array([True, False, True, False], dtype="bool")
  27. mask = np.array([False, False, False, True], dtype="bool")
  28. result = BooleanArray(values, mask)
  29. assert result._data is values
  30. assert result._mask is mask
  31. result = BooleanArray(values, mask, copy=True)
  32. assert result._data is not values
  33. assert result._mask is not mask
  34. def test_to_boolean_array():
  35. expected = BooleanArray(
  36. np.array([True, False, True]), np.array([False, False, False])
  37. )
  38. result = pd.array([True, False, True], dtype="boolean")
  39. tm.assert_extension_array_equal(result, expected)
  40. result = pd.array(np.array([True, False, True]), dtype="boolean")
  41. tm.assert_extension_array_equal(result, expected)
  42. result = pd.array(np.array([True, False, True], dtype=object), dtype="boolean")
  43. tm.assert_extension_array_equal(result, expected)
  44. # with missing values
  45. expected = BooleanArray(
  46. np.array([True, False, True]), np.array([False, False, True])
  47. )
  48. result = pd.array([True, False, None], dtype="boolean")
  49. tm.assert_extension_array_equal(result, expected)
  50. result = pd.array(np.array([True, False, None], dtype=object), dtype="boolean")
  51. tm.assert_extension_array_equal(result, expected)
  52. def test_to_boolean_array_all_none():
  53. expected = BooleanArray(np.array([True, True, True]), np.array([True, True, True]))
  54. result = pd.array([None, None, None], dtype="boolean")
  55. tm.assert_extension_array_equal(result, expected)
  56. result = pd.array(np.array([None, None, None], dtype=object), dtype="boolean")
  57. tm.assert_extension_array_equal(result, expected)
  58. @pytest.mark.parametrize(
  59. "a, b",
  60. [
  61. ([True, False, None, np.nan, pd.NA], [True, False, None, None, None]),
  62. ([True, np.nan], [True, None]),
  63. ([True, pd.NA], [True, None]),
  64. ([np.nan, np.nan], [None, None]),
  65. (np.array([np.nan, np.nan], dtype=float), [None, None]),
  66. ],
  67. )
  68. def test_to_boolean_array_missing_indicators(a, b):
  69. result = pd.array(a, dtype="boolean")
  70. expected = pd.array(b, dtype="boolean")
  71. tm.assert_extension_array_equal(result, expected)
  72. @pytest.mark.parametrize(
  73. "values",
  74. [
  75. ["foo", "bar"],
  76. ["1", "2"],
  77. # "foo",
  78. [1, 2],
  79. [1.0, 2.0],
  80. pd.date_range("20130101", periods=2),
  81. np.array(["foo"]),
  82. np.array([1, 2]),
  83. np.array([1.0, 2.0]),
  84. [np.nan, {"a": 1}],
  85. ],
  86. )
  87. def test_to_boolean_array_error(values):
  88. # error in converting existing arrays to BooleanArray
  89. msg = "Need to pass bool-like value"
  90. with pytest.raises(TypeError, match=msg):
  91. pd.array(values, dtype="boolean")
  92. def test_to_boolean_array_from_integer_array():
  93. result = pd.array(np.array([1, 0, 1, 0]), dtype="boolean")
  94. expected = pd.array([True, False, True, False], dtype="boolean")
  95. tm.assert_extension_array_equal(result, expected)
  96. # with missing values
  97. result = pd.array(np.array([1, 0, 1, None]), dtype="boolean")
  98. expected = pd.array([True, False, True, None], dtype="boolean")
  99. tm.assert_extension_array_equal(result, expected)
  100. def test_to_boolean_array_from_float_array():
  101. result = pd.array(np.array([1.0, 0.0, 1.0, 0.0]), dtype="boolean")
  102. expected = pd.array([True, False, True, False], dtype="boolean")
  103. tm.assert_extension_array_equal(result, expected)
  104. # with missing values
  105. result = pd.array(np.array([1.0, 0.0, 1.0, np.nan]), dtype="boolean")
  106. expected = pd.array([True, False, True, None], dtype="boolean")
  107. tm.assert_extension_array_equal(result, expected)
  108. def test_to_boolean_array_integer_like():
  109. # integers of 0's and 1's
  110. result = pd.array([1, 0, 1, 0], dtype="boolean")
  111. expected = pd.array([True, False, True, False], dtype="boolean")
  112. tm.assert_extension_array_equal(result, expected)
  113. # with missing values
  114. result = pd.array([1, 0, 1, None], dtype="boolean")
  115. expected = pd.array([True, False, True, None], dtype="boolean")
  116. tm.assert_extension_array_equal(result, expected)
  117. def test_coerce_to_array():
  118. # TODO this is currently not public API
  119. values = np.array([True, False, True, False], dtype="bool")
  120. mask = np.array([False, False, False, True], dtype="bool")
  121. result = BooleanArray(*coerce_to_array(values, mask=mask))
  122. expected = BooleanArray(values, mask)
  123. tm.assert_extension_array_equal(result, expected)
  124. assert result._data is values
  125. assert result._mask is mask
  126. result = BooleanArray(*coerce_to_array(values, mask=mask, copy=True))
  127. expected = BooleanArray(values, mask)
  128. tm.assert_extension_array_equal(result, expected)
  129. assert result._data is not values
  130. assert result._mask is not mask
  131. # mixed missing from values and mask
  132. values = [True, False, None, False]
  133. mask = np.array([False, False, False, True], dtype="bool")
  134. result = BooleanArray(*coerce_to_array(values, mask=mask))
  135. expected = BooleanArray(
  136. np.array([True, False, True, True]), np.array([False, False, True, True])
  137. )
  138. tm.assert_extension_array_equal(result, expected)
  139. result = BooleanArray(*coerce_to_array(np.array(values, dtype=object), mask=mask))
  140. tm.assert_extension_array_equal(result, expected)
  141. result = BooleanArray(*coerce_to_array(values, mask=mask.tolist()))
  142. tm.assert_extension_array_equal(result, expected)
  143. # raise errors for wrong dimension
  144. values = np.array([True, False, True, False], dtype="bool")
  145. mask = np.array([False, False, False, True], dtype="bool")
  146. # passing 2D values is OK as long as no mask
  147. coerce_to_array(values.reshape(1, -1))
  148. with pytest.raises(ValueError, match="values.shape and mask.shape must match"):
  149. coerce_to_array(values.reshape(1, -1), mask=mask)
  150. with pytest.raises(ValueError, match="values.shape and mask.shape must match"):
  151. coerce_to_array(values, mask=mask.reshape(1, -1))
  152. def test_coerce_to_array_from_boolean_array():
  153. # passing BooleanArray to coerce_to_array
  154. values = np.array([True, False, True, False], dtype="bool")
  155. mask = np.array([False, False, False, True], dtype="bool")
  156. arr = BooleanArray(values, mask)
  157. result = BooleanArray(*coerce_to_array(arr))
  158. tm.assert_extension_array_equal(result, arr)
  159. # no copy
  160. assert result._data is arr._data
  161. assert result._mask is arr._mask
  162. result = BooleanArray(*coerce_to_array(arr), copy=True)
  163. tm.assert_extension_array_equal(result, arr)
  164. assert result._data is not arr._data
  165. assert result._mask is not arr._mask
  166. with pytest.raises(ValueError, match="cannot pass mask for BooleanArray input"):
  167. coerce_to_array(arr, mask=mask)
  168. def test_coerce_to_numpy_array():
  169. # with missing values -> object dtype
  170. arr = pd.array([True, False, None], dtype="boolean")
  171. result = np.array(arr)
  172. expected = np.array([True, False, pd.NA], dtype="object")
  173. tm.assert_numpy_array_equal(result, expected)
  174. # also with no missing values -> object dtype
  175. arr = pd.array([True, False, True], dtype="boolean")
  176. result = np.array(arr)
  177. expected = np.array([True, False, True], dtype="object")
  178. tm.assert_numpy_array_equal(result, expected)
  179. # force bool dtype
  180. result = np.array(arr, dtype="bool")
  181. expected = np.array([True, False, True], dtype="bool")
  182. tm.assert_numpy_array_equal(result, expected)
  183. # with missing values will raise error
  184. arr = pd.array([True, False, None], dtype="boolean")
  185. msg = (
  186. "cannot convert to 'bool'-dtype NumPy array with missing values. "
  187. "Specify an appropriate 'na_value' for this dtype."
  188. )
  189. with pytest.raises(ValueError, match=msg):
  190. np.array(arr, dtype="bool")
  191. def test_to_boolean_array_from_strings():
  192. result = BooleanArray._from_sequence_of_strings(
  193. np.array(["True", "False", "1", "1.0", "0", "0.0", np.nan], dtype=object)
  194. )
  195. expected = BooleanArray(
  196. np.array([True, False, True, True, False, False, False]),
  197. np.array([False, False, False, False, False, False, True]),
  198. )
  199. tm.assert_extension_array_equal(result, expected)
  200. def test_to_boolean_array_from_strings_invalid_string():
  201. with pytest.raises(ValueError, match="cannot be cast"):
  202. BooleanArray._from_sequence_of_strings(["donkey"])
  203. @pytest.mark.parametrize("box", [True, False], ids=["series", "array"])
  204. def test_to_numpy(box):
  205. con = pd.Series if box else pd.array
  206. # default (with or without missing values) -> object dtype
  207. arr = con([True, False, True], dtype="boolean")
  208. result = arr.to_numpy()
  209. expected = np.array([True, False, True], dtype="object")
  210. tm.assert_numpy_array_equal(result, expected)
  211. arr = con([True, False, None], dtype="boolean")
  212. result = arr.to_numpy()
  213. expected = np.array([True, False, pd.NA], dtype="object")
  214. tm.assert_numpy_array_equal(result, expected)
  215. arr = con([True, False, None], dtype="boolean")
  216. result = arr.to_numpy(dtype="str")
  217. expected = np.array([True, False, pd.NA], dtype=f"{tm.ENDIAN}U5")
  218. tm.assert_numpy_array_equal(result, expected)
  219. # no missing values -> can convert to bool, otherwise raises
  220. arr = con([True, False, True], dtype="boolean")
  221. result = arr.to_numpy(dtype="bool")
  222. expected = np.array([True, False, True], dtype="bool")
  223. tm.assert_numpy_array_equal(result, expected)
  224. arr = con([True, False, None], dtype="boolean")
  225. with pytest.raises(ValueError, match="cannot convert to 'bool'-dtype"):
  226. result = arr.to_numpy(dtype="bool")
  227. # specify dtype and na_value
  228. arr = con([True, False, None], dtype="boolean")
  229. result = arr.to_numpy(dtype=object, na_value=None)
  230. expected = np.array([True, False, None], dtype="object")
  231. tm.assert_numpy_array_equal(result, expected)
  232. result = arr.to_numpy(dtype=bool, na_value=False)
  233. expected = np.array([True, False, False], dtype="bool")
  234. tm.assert_numpy_array_equal(result, expected)
  235. result = arr.to_numpy(dtype="int64", na_value=-99)
  236. expected = np.array([1, 0, -99], dtype="int64")
  237. tm.assert_numpy_array_equal(result, expected)
  238. result = arr.to_numpy(dtype="float64", na_value=np.nan)
  239. expected = np.array([1, 0, np.nan], dtype="float64")
  240. tm.assert_numpy_array_equal(result, expected)
  241. # converting to int or float without specifying na_value raises
  242. with pytest.raises(ValueError, match="cannot convert to 'int64'-dtype"):
  243. arr.to_numpy(dtype="int64")
  244. with pytest.raises(ValueError, match="cannot convert to 'float64'-dtype"):
  245. arr.to_numpy(dtype="float64")
  246. def test_to_numpy_copy():
  247. # to_numpy can be zero-copy if no missing values
  248. arr = pd.array([True, False, True], dtype="boolean")
  249. result = arr.to_numpy(dtype=bool)
  250. result[0] = False
  251. tm.assert_extension_array_equal(
  252. arr, pd.array([False, False, True], dtype="boolean")
  253. )
  254. arr = pd.array([True, False, True], dtype="boolean")
  255. result = arr.to_numpy(dtype=bool, copy=True)
  256. result[0] = False
  257. tm.assert_extension_array_equal(arr, pd.array([True, False, True], dtype="boolean"))