dim2.py 11 KB


  1. """
  2. Tests for 2D compatibility.
  3. """
  4. import numpy as np
  5. import pytest
  6. from pandas._libs.missing import is_matching_na
  7. from pandas.core.dtypes.common import (
  8. is_bool_dtype,
  9. is_integer_dtype,
  10. )
  11. import pandas as pd
  12. from pandas.core.arrays.integer import INT_STR_TO_DTYPE
  13. from pandas.tests.extension.base.base import BaseExtensionTests
  14. class Dim2CompatTests(BaseExtensionTests):
  15. # Note: these are ONLY for ExtensionArray subclasses that support 2D arrays.
  16. # i.e. not for pyarrow-backed EAs.
  17. def test_transpose(self, data):
  18. arr2d = data.repeat(2).reshape(-1, 2)
  19. shape = arr2d.shape
  20. assert shape[0] != shape[-1] # otherwise the rest of the test is useless
  21. assert arr2d.T.shape == shape[::-1]
  22. def test_frame_from_2d_array(self, data):
  23. arr2d = data.repeat(2).reshape(-1, 2)
  24. df = pd.DataFrame(arr2d)
  25. expected = pd.DataFrame({0: arr2d[:, 0], 1: arr2d[:, 1]})
  26. self.assert_frame_equal(df, expected)
  27. def test_swapaxes(self, data):
  28. arr2d = data.repeat(2).reshape(-1, 2)
  29. result = arr2d.swapaxes(0, 1)
  30. expected = arr2d.T
  31. self.assert_extension_array_equal(result, expected)
  32. def test_delete_2d(self, data):
  33. arr2d = data.repeat(3).reshape(-1, 3)
  34. # axis = 0
  35. result = arr2d.delete(1, axis=0)
  36. expected = data.delete(1).repeat(3).reshape(-1, 3)
  37. self.assert_extension_array_equal(result, expected)
  38. # axis = 1
  39. result = arr2d.delete(1, axis=1)
  40. expected = data.repeat(2).reshape(-1, 2)
  41. self.assert_extension_array_equal(result, expected)
  42. def test_take_2d(self, data):
  43. arr2d = data.reshape(-1, 1)
  44. result = arr2d.take([0, 0, -1], axis=0)
  45. expected = data.take([0, 0, -1]).reshape(-1, 1)
  46. self.assert_extension_array_equal(result, expected)
  47. def test_repr_2d(self, data):
  48. # this could fail in a corner case where an element contained the name
  49. res = repr(data.reshape(1, -1))
  50. assert res.count(f"<{type(data).__name__}") == 1
  51. res = repr(data.reshape(-1, 1))
  52. assert res.count(f"<{type(data).__name__}") == 1
  53. def test_reshape(self, data):
  54. arr2d = data.reshape(-1, 1)
  55. assert arr2d.shape == (data.size, 1)
  56. assert len(arr2d) == len(data)
  57. arr2d = data.reshape((-1, 1))
  58. assert arr2d.shape == (data.size, 1)
  59. assert len(arr2d) == len(data)
  60. with pytest.raises(ValueError):
  61. data.reshape((data.size, 2))
  62. with pytest.raises(ValueError):
  63. data.reshape(data.size, 2)
  64. def test_getitem_2d(self, data):
  65. arr2d = data.reshape(1, -1)
  66. result = arr2d[0]
  67. self.assert_extension_array_equal(result, data)
  68. with pytest.raises(IndexError):
  69. arr2d[1]
  70. with pytest.raises(IndexError):
  71. arr2d[-2]
  72. result = arr2d[:]
  73. self.assert_extension_array_equal(result, arr2d)
  74. result = arr2d[:, :]
  75. self.assert_extension_array_equal(result, arr2d)
  76. result = arr2d[:, 0]
  77. expected = data[[0]]
  78. self.assert_extension_array_equal(result, expected)
  79. # dimension-expanding getitem on 1D
  80. result = data[:, np.newaxis]
  81. self.assert_extension_array_equal(result, arr2d.T)
  82. def test_iter_2d(self, data):
  83. arr2d = data.reshape(1, -1)
  84. objs = list(iter(arr2d))
  85. assert len(objs) == arr2d.shape[0]
  86. for obj in objs:
  87. assert isinstance(obj, type(data))
  88. assert obj.dtype == data.dtype
  89. assert obj.ndim == 1
  90. assert len(obj) == arr2d.shape[1]
  91. def test_tolist_2d(self, data):
  92. arr2d = data.reshape(1, -1)
  93. result = arr2d.tolist()
  94. expected = [data.tolist()]
  95. assert isinstance(result, list)
  96. assert all(isinstance(x, list) for x in result)
  97. assert result == expected
  98. def test_concat_2d(self, data):
  99. left = type(data)._concat_same_type([data, data]).reshape(-1, 2)
  100. right = left.copy()
  101. # axis=0
  102. result = left._concat_same_type([left, right], axis=0)
  103. expected = data._concat_same_type([data] * 4).reshape(-1, 2)
  104. self.assert_extension_array_equal(result, expected)
  105. # axis=1
  106. result = left._concat_same_type([left, right], axis=1)
  107. assert result.shape == (len(data), 4)
  108. self.assert_extension_array_equal(result[:, :2], left)
  109. self.assert_extension_array_equal(result[:, 2:], right)
  110. # axis > 1 -> invalid
  111. msg = "axis 2 is out of bounds for array of dimension 2"
  112. with pytest.raises(ValueError, match=msg):
  113. left._concat_same_type([left, right], axis=2)
  114. @pytest.mark.parametrize("method", ["backfill", "pad"])
  115. def test_fillna_2d_method(self, data_missing, method):
  116. arr = data_missing.repeat(2).reshape(2, 2)
  117. assert arr[0].isna().all()
  118. assert not arr[1].isna().any()
  119. result = arr.fillna(method=method)
  120. expected = data_missing.fillna(method=method).repeat(2).reshape(2, 2)
  121. self.assert_extension_array_equal(result, expected)
  122. @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
  123. def test_reductions_2d_axis_none(self, data, method):
  124. arr2d = data.reshape(1, -1)
  125. err_expected = None
  126. err_result = None
  127. try:
  128. expected = getattr(data, method)()
  129. except Exception as err:
  130. # if the 1D reduction is invalid, the 2D reduction should be as well
  131. err_expected = err
  132. try:
  133. result = getattr(arr2d, method)(axis=None)
  134. except Exception as err2:
  135. err_result = err2
  136. else:
  137. result = getattr(arr2d, method)(axis=None)
  138. if err_result is not None or err_expected is not None:
  139. assert type(err_result) == type(err_expected)
  140. return
  141. assert is_matching_na(result, expected) or result == expected
  142. @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
  143. def test_reductions_2d_axis0(self, data, method):
  144. arr2d = data.reshape(1, -1)
  145. kwargs = {}
  146. if method in ["std", "var"]:
  147. # pass ddof=0 so we get all-zero std instead of all-NA std
  148. kwargs["ddof"] = 0
  149. try:
  150. result = getattr(arr2d, method)(axis=0, **kwargs)
  151. except Exception as err:
  152. try:
  153. getattr(data, method)()
  154. except Exception as err2:
  155. assert type(err) == type(err2)
  156. return
  157. else:
  158. raise AssertionError("Both reductions should raise or neither")
  159. def get_reduction_result_dtype(dtype):
  160. # windows and 32bit builds will in some cases have int32/uint32
  161. # where other builds will have int64/uint64.
  162. if dtype.itemsize == 8:
  163. return dtype
  164. elif dtype.kind in "ib":
  165. return INT_STR_TO_DTYPE[np.dtype(int).name]
  166. else:
  167. # i.e. dtype.kind == "u"
  168. return INT_STR_TO_DTYPE[np.dtype(np.uint).name]
  169. if method in ["median", "sum", "prod"]:
  170. # std and var are not dtype-preserving
  171. expected = data
  172. if method in ["sum", "prod"] and data.dtype.kind in "iub":
  173. dtype = get_reduction_result_dtype(data.dtype)
  174. expected = data.astype(dtype)
  175. if data.dtype.kind == "b" and method in ["sum", "prod"]:
  176. # We get IntegerArray instead of BooleanArray
  177. pass
  178. else:
  179. assert type(expected) == type(data), type(expected)
  180. assert dtype == expected.dtype
  181. self.assert_extension_array_equal(result, expected)
  182. elif method in ["mean", "std", "var"]:
  183. if is_integer_dtype(data) or is_bool_dtype(data):
  184. data = data.astype("Float64")
  185. if method == "mean":
  186. self.assert_extension_array_equal(result, data)
  187. else:
  188. self.assert_extension_array_equal(result, data - data)
  189. @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"])
  190. def test_reductions_2d_axis1(self, data, method):
  191. arr2d = data.reshape(1, -1)
  192. try:
  193. result = getattr(arr2d, method)(axis=1)
  194. except Exception as err:
  195. try:
  196. getattr(data, method)()
  197. except Exception as err2:
  198. assert type(err) == type(err2)
  199. return
  200. else:
  201. raise AssertionError("Both reductions should raise or neither")
  202. # not necessarily type/dtype-preserving, so weaker assertions
  203. assert result.shape == (1,)
  204. expected_scalar = getattr(data, method)()
  205. res = result[0]
  206. assert is_matching_na(res, expected_scalar) or res == expected_scalar
  207. class NDArrayBacked2DTests(Dim2CompatTests):
  208. # More specific tests for NDArrayBackedExtensionArray subclasses
  209. def test_copy_order(self, data):
  210. # We should be matching numpy semantics for the "order" keyword in 'copy'
  211. arr2d = data.repeat(2).reshape(-1, 2)
  212. assert arr2d._ndarray.flags["C_CONTIGUOUS"]
  213. res = arr2d.copy()
  214. assert res._ndarray.flags["C_CONTIGUOUS"]
  215. res = arr2d[::2, ::2].copy()
  216. assert res._ndarray.flags["C_CONTIGUOUS"]
  217. res = arr2d.copy("F")
  218. assert not res._ndarray.flags["C_CONTIGUOUS"]
  219. assert res._ndarray.flags["F_CONTIGUOUS"]
  220. res = arr2d.copy("K")
  221. assert res._ndarray.flags["C_CONTIGUOUS"]
  222. res = arr2d.T.copy("K")
  223. assert not res._ndarray.flags["C_CONTIGUOUS"]
  224. assert res._ndarray.flags["F_CONTIGUOUS"]
  225. # order not accepted by numpy
  226. msg = r"order must be one of 'C', 'F', 'A', or 'K' \(got 'Q'\)"
  227. with pytest.raises(ValueError, match=msg):
  228. arr2d.copy("Q")
  229. # neither contiguity
  230. arr_nc = arr2d[::2]
  231. assert not arr_nc._ndarray.flags["C_CONTIGUOUS"]
  232. assert not arr_nc._ndarray.flags["F_CONTIGUOUS"]
  233. assert arr_nc.copy()._ndarray.flags["C_CONTIGUOUS"]
  234. assert not arr_nc.copy()._ndarray.flags["F_CONTIGUOUS"]
  235. assert arr_nc.copy("C")._ndarray.flags["C_CONTIGUOUS"]
  236. assert not arr_nc.copy("C")._ndarray.flags["F_CONTIGUOUS"]
  237. assert not arr_nc.copy("F")._ndarray.flags["C_CONTIGUOUS"]
  238. assert arr_nc.copy("F")._ndarray.flags["F_CONTIGUOUS"]
  239. assert arr_nc.copy("K")._ndarray.flags["C_CONTIGUOUS"]
  240. assert not arr_nc.copy("K")._ndarray.flags["F_CONTIGUOUS"]