test_raises.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633
  1. # Only tests that raise an error and have no better location should go here.
  2. # Tests for specific groupby methods should go in their respective
  3. # test file.
  4. import datetime
  5. import numpy as np
  6. import pytest
  7. from pandas import (
  8. Categorical,
  9. DataFrame,
  10. Grouper,
  11. Series,
  12. )
  13. from pandas.tests.groupby import get_groupby_method_args
  14. @pytest.fixture(
  15. params=[
  16. "a",
  17. ["a"],
  18. ["a", "b"],
  19. Grouper(key="a"),
  20. lambda x: x % 2,
  21. [0, 0, 0, 1, 2, 2, 2, 3, 3],
  22. np.array([0, 0, 0, 1, 2, 2, 2, 3, 3]),
  23. dict(zip(range(9), [0, 0, 0, 1, 2, 2, 2, 3, 3])),
  24. Series([1, 1, 1, 1, 1, 2, 2, 2, 2]),
  25. [Series([1, 1, 1, 1, 1, 2, 2, 2, 2]), Series([3, 3, 4, 4, 4, 4, 4, 3, 3])],
  26. ]
  27. )
  28. def by(request):
  29. return request.param
  30. @pytest.fixture(params=[True, False])
  31. def groupby_series(request):
  32. return request.param
  33. @pytest.mark.parametrize("how", ["method", "agg", "transform"])
  34. def test_groupby_raises_string(how, by, groupby_series, groupby_func):
  35. df = DataFrame(
  36. {
  37. "a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
  38. "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
  39. "c": range(9),
  40. "d": list("xyzwtyuio"),
  41. }
  42. )
  43. args = get_groupby_method_args(groupby_func, df)
  44. gb = df.groupby(by=by)
  45. if groupby_series:
  46. gb = gb["d"]
  47. if groupby_func == "corrwith":
  48. assert not hasattr(gb, "corrwith")
  49. return
  50. klass, msg = {
  51. "all": (None, ""),
  52. "any": (None, ""),
  53. "bfill": (None, ""),
  54. "corrwith": (TypeError, "Could not convert"),
  55. "count": (None, ""),
  56. "cumcount": (None, ""),
  57. "cummax": (
  58. (NotImplementedError, TypeError),
  59. "(function|cummax) is not (implemented|supported) for (this|object) dtype",
  60. ),
  61. "cummin": (
  62. (NotImplementedError, TypeError),
  63. "(function|cummin) is not (implemented|supported) for (this|object) dtype",
  64. ),
  65. "cumprod": (
  66. (NotImplementedError, TypeError),
  67. "(function|cumprod) is not (implemented|supported) for (this|object) dtype",
  68. ),
  69. "cumsum": (
  70. (NotImplementedError, TypeError),
  71. "(function|cumsum) is not (implemented|supported) for (this|object) dtype",
  72. ),
  73. "diff": (TypeError, "unsupported operand type"),
  74. "ffill": (None, ""),
  75. "fillna": (None, ""),
  76. "first": (None, ""),
  77. "idxmax": (TypeError, "'argmax' not allowed for this dtype"),
  78. "idxmin": (TypeError, "'argmin' not allowed for this dtype"),
  79. "last": (None, ""),
  80. "max": (None, ""),
  81. "mean": (TypeError, "Could not convert xy?z?w?t?y?u?i?o? to numeric"),
  82. "median": (TypeError, "could not convert string to float"),
  83. "min": (None, ""),
  84. "ngroup": (None, ""),
  85. "nunique": (None, ""),
  86. "pct_change": (TypeError, "unsupported operand type"),
  87. "prod": (TypeError, "can't multiply sequence by non-int of type 'str'"),
  88. "quantile": (TypeError, "cannot be performed against 'object' dtypes!"),
  89. "rank": (None, ""),
  90. "sem": (ValueError, "could not convert string to float"),
  91. "shift": (None, ""),
  92. "size": (None, ""),
  93. "skew": (TypeError, "could not convert string to float"),
  94. "std": (ValueError, "could not convert string to float"),
  95. "sum": (None, ""),
  96. "var": (TypeError, "could not convert string to float"),
  97. }[groupby_func]
  98. if klass is None:
  99. if how == "method":
  100. getattr(gb, groupby_func)(*args)
  101. elif how == "agg":
  102. gb.agg(groupby_func, *args)
  103. else:
  104. gb.transform(groupby_func, *args)
  105. else:
  106. with pytest.raises(klass, match=msg):
  107. if how == "method":
  108. getattr(gb, groupby_func)(*args)
  109. elif how == "agg":
  110. gb.agg(groupby_func, *args)
  111. else:
  112. gb.transform(groupby_func, *args)
  113. @pytest.mark.parametrize("how", ["agg", "transform"])
  114. def test_groupby_raises_string_udf(how, by, groupby_series):
  115. df = DataFrame(
  116. {
  117. "a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
  118. "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
  119. "c": range(9),
  120. "d": list("xyzwtyuio"),
  121. }
  122. )
  123. gb = df.groupby(by=by)
  124. if groupby_series:
  125. gb = gb["d"]
  126. def func(x):
  127. raise TypeError("Test error message")
  128. with pytest.raises(TypeError, match="Test error message"):
  129. getattr(gb, how)(func)
  130. @pytest.mark.parametrize("how", ["agg", "transform"])
  131. @pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean])
  132. def test_groupby_raises_string_np(how, by, groupby_series, groupby_func_np):
  133. # GH#50749
  134. df = DataFrame(
  135. {
  136. "a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
  137. "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
  138. "c": range(9),
  139. "d": list("xyzwtyuio"),
  140. }
  141. )
  142. gb = df.groupby(by=by)
  143. if groupby_series:
  144. gb = gb["d"]
  145. klass, msg = {
  146. np.sum: (None, ""),
  147. np.mean: (TypeError, "Could not convert xy?z?w?t?y?u?i?o? to numeric"),
  148. }[groupby_func_np]
  149. if klass is None:
  150. getattr(gb, how)(groupby_func_np)
  151. else:
  152. with pytest.raises(klass, match=msg):
  153. getattr(gb, how)(groupby_func_np)
  154. @pytest.mark.parametrize("how", ["method", "agg", "transform"])
  155. def test_groupby_raises_datetime(how, by, groupby_series, groupby_func):
  156. df = DataFrame(
  157. {
  158. "a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
  159. "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
  160. "c": range(9),
  161. "d": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000),
  162. }
  163. )
  164. args = get_groupby_method_args(groupby_func, df)
  165. gb = df.groupby(by=by)
  166. if groupby_series:
  167. gb = gb["d"]
  168. if groupby_func == "corrwith":
  169. assert not hasattr(gb, "corrwith")
  170. return
  171. klass, msg = {
  172. "all": (None, ""),
  173. "any": (None, ""),
  174. "bfill": (None, ""),
  175. "corrwith": (TypeError, "cannot perform __mul__ with this index type"),
  176. "count": (None, ""),
  177. "cumcount": (None, ""),
  178. "cummax": (None, ""),
  179. "cummin": (None, ""),
  180. "cumprod": (TypeError, "datetime64 type does not support cumprod operations"),
  181. "cumsum": (TypeError, "datetime64 type does not support cumsum operations"),
  182. "diff": (None, ""),
  183. "ffill": (None, ""),
  184. "fillna": (None, ""),
  185. "first": (None, ""),
  186. "idxmax": (None, ""),
  187. "idxmin": (None, ""),
  188. "last": (None, ""),
  189. "max": (None, ""),
  190. "mean": (None, ""),
  191. "median": (None, ""),
  192. "min": (None, ""),
  193. "ngroup": (None, ""),
  194. "nunique": (None, ""),
  195. "pct_change": (TypeError, "cannot perform __truediv__ with this index type"),
  196. "prod": (TypeError, "datetime64 type does not support prod"),
  197. "quantile": (None, ""),
  198. "rank": (None, ""),
  199. "sem": (None, ""),
  200. "shift": (None, ""),
  201. "size": (None, ""),
  202. "skew": (TypeError, r"dtype datetime64\[ns\] does not support reduction"),
  203. "std": (None, ""),
  204. "sum": (TypeError, "datetime64 type does not support sum operations"),
  205. "var": (None, ""),
  206. }[groupby_func]
  207. if klass is None:
  208. if how == "method":
  209. getattr(gb, groupby_func)(*args)
  210. elif how == "agg":
  211. gb.agg(groupby_func, *args)
  212. else:
  213. gb.transform(groupby_func, *args)
  214. else:
  215. with pytest.raises(klass, match=msg):
  216. if how == "method":
  217. getattr(gb, groupby_func)(*args)
  218. elif how == "agg":
  219. gb.agg(groupby_func, *args)
  220. else:
  221. gb.transform(groupby_func, *args)
  222. @pytest.mark.parametrize("how", ["agg", "transform"])
  223. def test_groupby_raises_datetime_udf(how, by, groupby_series):
  224. df = DataFrame(
  225. {
  226. "a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
  227. "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
  228. "c": range(9),
  229. "d": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000),
  230. }
  231. )
  232. gb = df.groupby(by=by)
  233. if groupby_series:
  234. gb = gb["d"]
  235. def func(x):
  236. raise TypeError("Test error message")
  237. with pytest.raises(TypeError, match="Test error message"):
  238. getattr(gb, how)(func)
  239. @pytest.mark.parametrize("how", ["agg", "transform"])
  240. @pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean])
  241. def test_groupby_raises_datetime_np(how, by, groupby_series, groupby_func_np):
  242. # GH#50749
  243. df = DataFrame(
  244. {
  245. "a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
  246. "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
  247. "c": range(9),
  248. "d": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000),
  249. }
  250. )
  251. gb = df.groupby(by=by)
  252. if groupby_series:
  253. gb = gb["d"]
  254. klass, msg = {
  255. np.sum: (TypeError, "datetime64 type does not support sum operations"),
  256. np.mean: (None, ""),
  257. }[groupby_func_np]
  258. if klass is None:
  259. getattr(gb, how)(groupby_func_np)
  260. else:
  261. with pytest.raises(klass, match=msg):
  262. getattr(gb, how)(groupby_func_np)
  263. @pytest.mark.parametrize("how", ["method", "agg", "transform"])
  264. def test_groupby_raises_category(
  265. how, by, groupby_series, groupby_func, using_copy_on_write
  266. ):
  267. # GH#50749
  268. df = DataFrame(
  269. {
  270. "a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
  271. "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
  272. "c": range(9),
  273. "d": Categorical(
  274. ["a", "a", "a", "a", "b", "b", "b", "b", "c"],
  275. categories=["a", "b", "c", "d"],
  276. ordered=True,
  277. ),
  278. }
  279. )
  280. args = get_groupby_method_args(groupby_func, df)
  281. gb = df.groupby(by=by)
  282. if groupby_series:
  283. gb = gb["d"]
  284. if groupby_func == "corrwith":
  285. assert not hasattr(gb, "corrwith")
  286. return
  287. klass, msg = {
  288. "all": (None, ""),
  289. "any": (None, ""),
  290. "bfill": (None, ""),
  291. "corrwith": (
  292. TypeError,
  293. r"unsupported operand type\(s\) for \*: 'Categorical' and 'int'",
  294. ),
  295. "count": (None, ""),
  296. "cumcount": (None, ""),
  297. "cummax": (
  298. (NotImplementedError, TypeError),
  299. "(category type does not support cummax operations|"
  300. + "category dtype not supported|"
  301. + "cummax is not supported for category dtype)",
  302. ),
  303. "cummin": (
  304. (NotImplementedError, TypeError),
  305. "(category type does not support cummin operations|"
  306. + "category dtype not supported|"
  307. "cummin is not supported for category dtype)",
  308. ),
  309. "cumprod": (
  310. (NotImplementedError, TypeError),
  311. "(category type does not support cumprod operations|"
  312. + "category dtype not supported|"
  313. "cumprod is not supported for category dtype)",
  314. ),
  315. "cumsum": (
  316. (NotImplementedError, TypeError),
  317. "(category type does not support cumsum operations|"
  318. + "category dtype not supported|"
  319. "cumsum is not supported for category dtype)",
  320. ),
  321. "diff": (
  322. TypeError,
  323. r"unsupported operand type\(s\) for -: 'Categorical' and 'Categorical'",
  324. ),
  325. "ffill": (None, ""),
  326. "fillna": (
  327. TypeError,
  328. r"Cannot setitem on a Categorical with a new category \(0\), "
  329. + "set the categories first",
  330. )
  331. if not using_copy_on_write
  332. else (None, ""), # no-op with CoW
  333. "first": (None, ""),
  334. "idxmax": (None, ""),
  335. "idxmin": (None, ""),
  336. "last": (None, ""),
  337. "max": (None, ""),
  338. "mean": (
  339. TypeError,
  340. "'Categorical' with dtype category does not support reduction 'mean'",
  341. ),
  342. "median": (
  343. TypeError,
  344. "'Categorical' with dtype category does not support reduction 'median'",
  345. ),
  346. "min": (None, ""),
  347. "ngroup": (None, ""),
  348. "nunique": (None, ""),
  349. "pct_change": (
  350. TypeError,
  351. r"unsupported operand type\(s\) for /: 'Categorical' and 'Categorical'",
  352. ),
  353. "prod": (TypeError, "category type does not support prod operations"),
  354. "quantile": (TypeError, "No matching signature found"),
  355. "rank": (None, ""),
  356. "sem": (ValueError, "Cannot cast object dtype to float64"),
  357. "shift": (None, ""),
  358. "size": (None, ""),
  359. "skew": (
  360. TypeError,
  361. "'Categorical' with dtype category does not support reduction 'skew'",
  362. ),
  363. "std": (ValueError, "Cannot cast object dtype to float64"),
  364. "sum": (TypeError, "category type does not support sum operations"),
  365. "var": (
  366. TypeError,
  367. "'Categorical' with dtype category does not support reduction 'var'",
  368. ),
  369. }[groupby_func]
  370. if klass is None:
  371. if how == "method":
  372. getattr(gb, groupby_func)(*args)
  373. elif how == "agg":
  374. gb.agg(groupby_func, *args)
  375. else:
  376. gb.transform(groupby_func, *args)
  377. else:
  378. with pytest.raises(klass, match=msg):
  379. if how == "method":
  380. getattr(gb, groupby_func)(*args)
  381. elif how == "agg":
  382. gb.agg(groupby_func, *args)
  383. else:
  384. gb.transform(groupby_func, *args)
  385. @pytest.mark.parametrize("how", ["agg", "transform"])
  386. def test_groupby_raises_category_udf(how, by, groupby_series):
  387. # GH#50749
  388. df = DataFrame(
  389. {
  390. "a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
  391. "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
  392. "c": range(9),
  393. "d": Categorical(
  394. ["a", "a", "a", "a", "b", "b", "b", "b", "c"],
  395. categories=["a", "b", "c", "d"],
  396. ordered=True,
  397. ),
  398. }
  399. )
  400. gb = df.groupby(by=by)
  401. if groupby_series:
  402. gb = gb["d"]
  403. def func(x):
  404. raise TypeError("Test error message")
  405. with pytest.raises(TypeError, match="Test error message"):
  406. getattr(gb, how)(func)
  407. @pytest.mark.parametrize("how", ["agg", "transform"])
  408. @pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean])
  409. def test_groupby_raises_category_np(how, by, groupby_series, groupby_func_np):
  410. # GH#50749
  411. df = DataFrame(
  412. {
  413. "a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
  414. "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
  415. "c": range(9),
  416. "d": Categorical(
  417. ["a", "a", "a", "a", "b", "b", "b", "b", "c"],
  418. categories=["a", "b", "c", "d"],
  419. ordered=True,
  420. ),
  421. }
  422. )
  423. gb = df.groupby(by=by)
  424. if groupby_series:
  425. gb = gb["d"]
  426. klass, msg = {
  427. np.sum: (TypeError, "category type does not support sum operations"),
  428. np.mean: (
  429. TypeError,
  430. "'Categorical' with dtype category does not support reduction 'mean'",
  431. ),
  432. }[groupby_func_np]
  433. if klass is None:
  434. getattr(gb, how)(groupby_func_np)
  435. else:
  436. with pytest.raises(klass, match=msg):
  437. getattr(gb, how)(groupby_func_np)
  438. @pytest.mark.parametrize("how", ["method", "agg", "transform"])
  439. def test_groupby_raises_category_on_category(
  440. how, by, groupby_series, groupby_func, observed, using_copy_on_write
  441. ):
  442. # GH#50749
  443. df = DataFrame(
  444. {
  445. "a": Categorical(
  446. ["a", "a", "a", "a", "b", "b", "b", "b", "c"],
  447. categories=["a", "b", "c", "d"],
  448. ordered=True,
  449. ),
  450. "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
  451. "c": range(9),
  452. "d": Categorical(
  453. ["a", "a", "a", "a", "b", "b", "c", "c", "c"],
  454. categories=["a", "b", "c", "d"],
  455. ordered=True,
  456. ),
  457. }
  458. )
  459. args = get_groupby_method_args(groupby_func, df)
  460. gb = df.groupby(by=by, observed=observed)
  461. if groupby_series:
  462. gb = gb["d"]
  463. if groupby_func == "corrwith":
  464. assert not hasattr(gb, "corrwith")
  465. return
  466. empty_groups = any(group.empty for group in gb.groups.values())
  467. klass, msg = {
  468. "all": (None, ""),
  469. "any": (None, ""),
  470. "bfill": (None, ""),
  471. "corrwith": (
  472. TypeError,
  473. r"unsupported operand type\(s\) for \*: 'Categorical' and 'int'",
  474. ),
  475. "count": (None, ""),
  476. "cumcount": (None, ""),
  477. "cummax": (
  478. (NotImplementedError, TypeError),
  479. "(cummax is not supported for category dtype|"
  480. + "category dtype not supported|"
  481. + "category type does not support cummax operations)",
  482. ),
  483. "cummin": (
  484. (NotImplementedError, TypeError),
  485. "(cummin is not supported for category dtype|"
  486. + "category dtype not supported|"
  487. "category type does not support cummin operations)",
  488. ),
  489. "cumprod": (
  490. (NotImplementedError, TypeError),
  491. "(cumprod is not supported for category dtype|"
  492. + "category dtype not supported|"
  493. "category type does not support cumprod operations)",
  494. ),
  495. "cumsum": (
  496. (NotImplementedError, TypeError),
  497. "(cumsum is not supported for category dtype|"
  498. + "category dtype not supported|"
  499. + "category type does not support cumsum operations)",
  500. ),
  501. "diff": (TypeError, "unsupported operand type"),
  502. "ffill": (None, ""),
  503. "fillna": (
  504. TypeError,
  505. r"Cannot setitem on a Categorical with a new category \(0\), "
  506. + "set the categories first",
  507. )
  508. if not using_copy_on_write
  509. else (None, ""), # no-op with CoW
  510. "first": (None, ""),
  511. "idxmax": (ValueError, "attempt to get argmax of an empty sequence")
  512. if empty_groups
  513. else (None, ""),
  514. "idxmin": (ValueError, "attempt to get argmin of an empty sequence")
  515. if empty_groups
  516. else (None, ""),
  517. "last": (None, ""),
  518. "max": (None, ""),
  519. "mean": (
  520. TypeError,
  521. "'Categorical' with dtype category does not support reduction 'mean'",
  522. ),
  523. "median": (
  524. TypeError,
  525. "'Categorical' with dtype category does not support reduction 'median'",
  526. ),
  527. "min": (None, ""),
  528. "ngroup": (None, ""),
  529. "nunique": (None, ""),
  530. "pct_change": (TypeError, "unsupported operand type"),
  531. "prod": (TypeError, "category type does not support prod operations"),
  532. "quantile": (TypeError, ""),
  533. "rank": (None, ""),
  534. "sem": (ValueError, "Cannot cast object dtype to float64"),
  535. "shift": (None, ""),
  536. "size": (None, ""),
  537. "skew": (
  538. TypeError,
  539. "'Categorical' with dtype category does not support reduction 'skew'",
  540. ),
  541. "std": (ValueError, "Cannot cast object dtype to float64"),
  542. "sum": (TypeError, "category type does not support sum operations"),
  543. "var": (
  544. TypeError,
  545. "'Categorical' with dtype category does not support reduction 'var'",
  546. ),
  547. }[groupby_func]
  548. if klass is None:
  549. if how == "method":
  550. getattr(gb, groupby_func)(*args)
  551. elif how == "agg":
  552. gb.agg(groupby_func, *args)
  553. else:
  554. gb.transform(groupby_func, *args)
  555. else:
  556. with pytest.raises(klass, match=msg):
  557. if how == "method":
  558. getattr(gb, groupby_func)(*args)
  559. elif how == "agg":
  560. gb.agg(groupby_func, *args)
  561. else:
  562. gb.transform(groupby_func, *args)
  563. def test_subsetting_columns_axis_1_raises():
  564. # GH 35443
  565. df = DataFrame({"a": [1], "b": [2], "c": [3]})
  566. gb = df.groupby("a", axis=1)
  567. with pytest.raises(ValueError, match="Cannot subset columns when using axis=1"):
  568. gb["b"]