test_generic.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. from copy import (
  2. copy,
  3. deepcopy,
  4. )
  5. import numpy as np
  6. import pytest
  7. from pandas.core.dtypes.common import is_scalar
  8. from pandas import (
  9. DataFrame,
  10. Series,
  11. )
  12. import pandas._testing as tm
  13. # ----------------------------------------------------------------------
  14. # Generic types test cases
  15. def construct(box, shape, value=None, dtype=None, **kwargs):
  16. """
  17. construct an object for the given shape
  18. if value is specified use that if its a scalar
  19. if value is an array, repeat it as needed
  20. """
  21. if isinstance(shape, int):
  22. shape = tuple([shape] * box._AXIS_LEN)
  23. if value is not None:
  24. if is_scalar(value):
  25. if value == "empty":
  26. arr = None
  27. dtype = np.float64
  28. # remove the info axis
  29. kwargs.pop(box._info_axis_name, None)
  30. else:
  31. arr = np.empty(shape, dtype=dtype)
  32. arr.fill(value)
  33. else:
  34. fshape = np.prod(shape)
  35. arr = value.ravel()
  36. new_shape = fshape / arr.shape[0]
  37. if fshape % arr.shape[0] != 0:
  38. raise Exception("invalid value passed in construct")
  39. arr = np.repeat(arr, new_shape).reshape(shape)
  40. else:
  41. arr = np.random.randn(*shape)
  42. return box(arr, dtype=dtype, **kwargs)
  43. class TestGeneric:
  44. @pytest.mark.parametrize(
  45. "func",
  46. [
  47. str.lower,
  48. {x: x.lower() for x in list("ABCD")},
  49. Series({x: x.lower() for x in list("ABCD")}),
  50. ],
  51. )
  52. def test_rename(self, frame_or_series, func):
  53. # single axis
  54. idx = list("ABCD")
  55. for axis in frame_or_series._AXIS_ORDERS:
  56. kwargs = {axis: idx}
  57. obj = construct(frame_or_series, 4, **kwargs)
  58. # rename a single axis
  59. result = obj.rename(**{axis: func})
  60. expected = obj.copy()
  61. setattr(expected, axis, list("abcd"))
  62. tm.assert_equal(result, expected)
  63. def test_get_numeric_data(self, frame_or_series):
  64. n = 4
  65. kwargs = {
  66. frame_or_series._get_axis_name(i): list(range(n))
  67. for i in range(frame_or_series._AXIS_LEN)
  68. }
  69. # get the numeric data
  70. o = construct(frame_or_series, n, **kwargs)
  71. result = o._get_numeric_data()
  72. tm.assert_equal(result, o)
  73. # non-inclusion
  74. result = o._get_bool_data()
  75. expected = construct(frame_or_series, n, value="empty", **kwargs)
  76. if isinstance(o, DataFrame):
  77. # preserve columns dtype
  78. expected.columns = o.columns[:0]
  79. # https://github.com/pandas-dev/pandas/issues/50862
  80. tm.assert_equal(result.reset_index(drop=True), expected)
  81. # get the bool data
  82. arr = np.array([True, True, False, True])
  83. o = construct(frame_or_series, n, value=arr, **kwargs)
  84. result = o._get_numeric_data()
  85. tm.assert_equal(result, o)
  86. def test_nonzero(self, frame_or_series):
  87. # GH 4633
  88. # look at the boolean/nonzero behavior for objects
  89. obj = construct(frame_or_series, shape=4)
  90. msg = f"The truth value of a {frame_or_series.__name__} is ambiguous"
  91. with pytest.raises(ValueError, match=msg):
  92. bool(obj == 0)
  93. with pytest.raises(ValueError, match=msg):
  94. bool(obj == 1)
  95. with pytest.raises(ValueError, match=msg):
  96. bool(obj)
  97. obj = construct(frame_or_series, shape=4, value=1)
  98. with pytest.raises(ValueError, match=msg):
  99. bool(obj == 0)
  100. with pytest.raises(ValueError, match=msg):
  101. bool(obj == 1)
  102. with pytest.raises(ValueError, match=msg):
  103. bool(obj)
  104. obj = construct(frame_or_series, shape=4, value=np.nan)
  105. with pytest.raises(ValueError, match=msg):
  106. bool(obj == 0)
  107. with pytest.raises(ValueError, match=msg):
  108. bool(obj == 1)
  109. with pytest.raises(ValueError, match=msg):
  110. bool(obj)
  111. # empty
  112. obj = construct(frame_or_series, shape=0)
  113. with pytest.raises(ValueError, match=msg):
  114. bool(obj)
  115. # invalid behaviors
  116. obj1 = construct(frame_or_series, shape=4, value=1)
  117. obj2 = construct(frame_or_series, shape=4, value=1)
  118. with pytest.raises(ValueError, match=msg):
  119. if obj1:
  120. pass
  121. with pytest.raises(ValueError, match=msg):
  122. obj1 and obj2
  123. with pytest.raises(ValueError, match=msg):
  124. obj1 or obj2
  125. with pytest.raises(ValueError, match=msg):
  126. not obj1
  127. def test_frame_or_series_compound_dtypes(self, frame_or_series):
  128. # see gh-5191
  129. # Compound dtypes should raise NotImplementedError.
  130. def f(dtype):
  131. return construct(frame_or_series, shape=3, value=1, dtype=dtype)
  132. msg = (
  133. "compound dtypes are not implemented "
  134. f"in the {frame_or_series.__name__} constructor"
  135. )
  136. with pytest.raises(NotImplementedError, match=msg):
  137. f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")])
  138. # these work (though results may be unexpected)
  139. f("int64")
  140. f("float64")
  141. f("M8[ns]")
  142. def test_metadata_propagation(self, frame_or_series):
  143. # check that the metadata matches up on the resulting ops
  144. o = construct(frame_or_series, shape=3)
  145. o.name = "foo"
  146. o2 = construct(frame_or_series, shape=3)
  147. o2.name = "bar"
  148. # ----------
  149. # preserving
  150. # ----------
  151. # simple ops with scalars
  152. for op in ["__add__", "__sub__", "__truediv__", "__mul__"]:
  153. result = getattr(o, op)(1)
  154. tm.assert_metadata_equivalent(o, result)
  155. # ops with like
  156. for op in ["__add__", "__sub__", "__truediv__", "__mul__"]:
  157. result = getattr(o, op)(o)
  158. tm.assert_metadata_equivalent(o, result)
  159. # simple boolean
  160. for op in ["__eq__", "__le__", "__ge__"]:
  161. v1 = getattr(o, op)(o)
  162. tm.assert_metadata_equivalent(o, v1)
  163. tm.assert_metadata_equivalent(o, v1 & v1)
  164. tm.assert_metadata_equivalent(o, v1 | v1)
  165. # combine_first
  166. result = o.combine_first(o2)
  167. tm.assert_metadata_equivalent(o, result)
  168. # ---------------------------
  169. # non-preserving (by default)
  170. # ---------------------------
  171. # add non-like
  172. result = o + o2
  173. tm.assert_metadata_equivalent(result)
  174. # simple boolean
  175. for op in ["__eq__", "__le__", "__ge__"]:
  176. # this is a name matching op
  177. v1 = getattr(o, op)(o)
  178. v2 = getattr(o, op)(o2)
  179. tm.assert_metadata_equivalent(v2)
  180. tm.assert_metadata_equivalent(v1 & v2)
  181. tm.assert_metadata_equivalent(v1 | v2)
  182. def test_size_compat(self, frame_or_series):
  183. # GH8846
  184. # size property should be defined
  185. o = construct(frame_or_series, shape=10)
  186. assert o.size == np.prod(o.shape)
  187. assert o.size == 10 ** len(o.axes)
  188. def test_split_compat(self, frame_or_series):
  189. # xref GH8846
  190. o = construct(frame_or_series, shape=10)
  191. assert len(np.array_split(o, 5)) == 5
  192. assert len(np.array_split(o, 2)) == 2
  193. # See gh-12301
  194. def test_stat_unexpected_keyword(self, frame_or_series):
  195. obj = construct(frame_or_series, 5)
  196. starwars = "Star Wars"
  197. errmsg = "unexpected keyword"
  198. with pytest.raises(TypeError, match=errmsg):
  199. obj.max(epic=starwars) # stat_function
  200. with pytest.raises(TypeError, match=errmsg):
  201. obj.var(epic=starwars) # stat_function_ddof
  202. with pytest.raises(TypeError, match=errmsg):
  203. obj.sum(epic=starwars) # cum_function
  204. with pytest.raises(TypeError, match=errmsg):
  205. obj.any(epic=starwars) # logical_function
  206. @pytest.mark.parametrize("func", ["sum", "cumsum", "any", "var"])
  207. def test_api_compat(self, func, frame_or_series):
  208. # GH 12021
  209. # compat for __name__, __qualname__
  210. obj = construct(frame_or_series, 5)
  211. f = getattr(obj, func)
  212. assert f.__name__ == func
  213. assert f.__qualname__.endswith(func)
  214. def test_stat_non_defaults_args(self, frame_or_series):
  215. obj = construct(frame_or_series, 5)
  216. out = np.array([0])
  217. errmsg = "the 'out' parameter is not supported"
  218. with pytest.raises(ValueError, match=errmsg):
  219. obj.max(out=out) # stat_function
  220. with pytest.raises(ValueError, match=errmsg):
  221. obj.var(out=out) # stat_function_ddof
  222. with pytest.raises(ValueError, match=errmsg):
  223. obj.sum(out=out) # cum_function
  224. with pytest.raises(ValueError, match=errmsg):
  225. obj.any(out=out) # logical_function
  226. def test_truncate_out_of_bounds(self, frame_or_series):
  227. # GH11382
  228. # small
  229. shape = [2000] + ([1] * (frame_or_series._AXIS_LEN - 1))
  230. small = construct(frame_or_series, shape, dtype="int8", value=1)
  231. tm.assert_equal(small.truncate(), small)
  232. tm.assert_equal(small.truncate(before=0, after=3e3), small)
  233. tm.assert_equal(small.truncate(before=-1, after=2e3), small)
  234. # big
  235. shape = [2_000_000] + ([1] * (frame_or_series._AXIS_LEN - 1))
  236. big = construct(frame_or_series, shape, dtype="int8", value=1)
  237. tm.assert_equal(big.truncate(), big)
  238. tm.assert_equal(big.truncate(before=0, after=3e6), big)
  239. tm.assert_equal(big.truncate(before=-1, after=2e6), big)
  240. @pytest.mark.parametrize(
  241. "func",
  242. [copy, deepcopy, lambda x: x.copy(deep=False), lambda x: x.copy(deep=True)],
  243. )
  244. @pytest.mark.parametrize("shape", [0, 1, 2])
  245. def test_copy_and_deepcopy(self, frame_or_series, shape, func):
  246. # GH 15444
  247. obj = construct(frame_or_series, shape)
  248. obj_copy = func(obj)
  249. assert obj_copy is not obj
  250. tm.assert_equal(obj_copy, obj)
  251. class TestNDFrame:
  252. # tests that don't fit elsewhere
  253. @pytest.mark.parametrize(
  254. "ser", [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]
  255. )
  256. def test_squeeze_series_noop(self, ser):
  257. # noop
  258. tm.assert_series_equal(ser.squeeze(), ser)
  259. def test_squeeze_frame_noop(self):
  260. # noop
  261. df = tm.makeTimeDataFrame()
  262. tm.assert_frame_equal(df.squeeze(), df)
  263. def test_squeeze_frame_reindex(self):
  264. # squeezing
  265. df = tm.makeTimeDataFrame().reindex(columns=["A"])
  266. tm.assert_series_equal(df.squeeze(), df["A"])
  267. def test_squeeze_0_len_dim(self):
  268. # don't fail with 0 length dimensions GH11229 & GH8999
  269. empty_series = Series([], name="five", dtype=np.float64)
  270. empty_frame = DataFrame([empty_series])
  271. tm.assert_series_equal(empty_series, empty_series.squeeze())
  272. tm.assert_series_equal(empty_series, empty_frame.squeeze())
  273. def test_squeeze_axis(self):
  274. # axis argument
  275. df = tm.makeTimeDataFrame(nper=1).iloc[:, :1]
  276. assert df.shape == (1, 1)
  277. tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0])
  278. tm.assert_series_equal(df.squeeze(axis="index"), df.iloc[0])
  279. tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0])
  280. tm.assert_series_equal(df.squeeze(axis="columns"), df.iloc[:, 0])
  281. assert df.squeeze() == df.iloc[0, 0]
  282. msg = "No axis named 2 for object type DataFrame"
  283. with pytest.raises(ValueError, match=msg):
  284. df.squeeze(axis=2)
  285. msg = "No axis named x for object type DataFrame"
  286. with pytest.raises(ValueError, match=msg):
  287. df.squeeze(axis="x")
  288. def test_squeeze_axis_len_3(self):
  289. df = tm.makeTimeDataFrame(3)
  290. tm.assert_frame_equal(df.squeeze(axis=0), df)
  291. def test_numpy_squeeze(self):
  292. s = tm.makeFloatSeries()
  293. tm.assert_series_equal(np.squeeze(s), s)
  294. df = tm.makeTimeDataFrame().reindex(columns=["A"])
  295. tm.assert_series_equal(np.squeeze(df), df["A"])
  296. @pytest.mark.parametrize(
  297. "ser", [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]
  298. )
  299. def test_transpose_series(self, ser):
  300. # calls implementation in pandas/core/base.py
  301. tm.assert_series_equal(ser.transpose(), ser)
  302. def test_transpose_frame(self):
  303. df = tm.makeTimeDataFrame()
  304. tm.assert_frame_equal(df.transpose().transpose(), df)
  305. def test_numpy_transpose(self, frame_or_series):
  306. obj = tm.makeTimeDataFrame()
  307. obj = tm.get_obj(obj, frame_or_series)
  308. if frame_or_series is Series:
  309. # 1D -> np.transpose is no-op
  310. tm.assert_series_equal(np.transpose(obj), obj)
  311. # round-trip preserved
  312. tm.assert_equal(np.transpose(np.transpose(obj)), obj)
  313. msg = "the 'axes' parameter is not supported"
  314. with pytest.raises(ValueError, match=msg):
  315. np.transpose(obj, axes=1)
  316. @pytest.mark.parametrize(
  317. "ser", [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]
  318. )
  319. def test_take_series(self, ser):
  320. indices = [1, 5, -2, 6, 3, -1]
  321. out = ser.take(indices)
  322. expected = Series(
  323. data=ser.values.take(indices),
  324. index=ser.index.take(indices),
  325. dtype=ser.dtype,
  326. )
  327. tm.assert_series_equal(out, expected)
  328. def test_take_frame(self):
  329. indices = [1, 5, -2, 6, 3, -1]
  330. df = tm.makeTimeDataFrame()
  331. out = df.take(indices)
  332. expected = DataFrame(
  333. data=df.values.take(indices, axis=0),
  334. index=df.index.take(indices),
  335. columns=df.columns,
  336. )
  337. tm.assert_frame_equal(out, expected)
  338. def test_take_invalid_kwargs(self, frame_or_series):
  339. indices = [-3, 2, 0, 1]
  340. obj = tm.makeTimeDataFrame()
  341. obj = tm.get_obj(obj, frame_or_series)
  342. msg = r"take\(\) got an unexpected keyword argument 'foo'"
  343. with pytest.raises(TypeError, match=msg):
  344. obj.take(indices, foo=2)
  345. msg = "the 'out' parameter is not supported"
  346. with pytest.raises(ValueError, match=msg):
  347. obj.take(indices, out=indices)
  348. msg = "the 'mode' parameter is not supported"
  349. with pytest.raises(ValueError, match=msg):
  350. obj.take(indices, mode="clip")
  351. def test_axis_classmethods(self, frame_or_series):
  352. box = frame_or_series
  353. obj = box(dtype=object)
  354. values = box._AXIS_TO_AXIS_NUMBER.keys()
  355. for v in values:
  356. assert obj._get_axis_number(v) == box._get_axis_number(v)
  357. assert obj._get_axis_name(v) == box._get_axis_name(v)
  358. assert obj._get_block_manager_axis(v) == box._get_block_manager_axis(v)
  359. def test_flags_identity(self, frame_or_series):
  360. obj = Series([1, 2])
  361. if frame_or_series is DataFrame:
  362. obj = obj.to_frame()
  363. assert obj.flags is obj.flags
  364. obj2 = obj.copy()
  365. assert obj2.flags is not obj.flags