123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633 |
- # Only tests that raise an error and have no better location should go here.
- # Tests for specific groupby methods should go in their respective
- # test file.
- import datetime
- import numpy as np
- import pytest
- from pandas import (
- Categorical,
- DataFrame,
- Grouper,
- Series,
- )
- from pandas.tests.groupby import get_groupby_method_args
- @pytest.fixture(
- params=[
- "a",
- ["a"],
- ["a", "b"],
- Grouper(key="a"),
- lambda x: x % 2,
- [0, 0, 0, 1, 2, 2, 2, 3, 3],
- np.array([0, 0, 0, 1, 2, 2, 2, 3, 3]),
- dict(zip(range(9), [0, 0, 0, 1, 2, 2, 2, 3, 3])),
- Series([1, 1, 1, 1, 1, 2, 2, 2, 2]),
- [Series([1, 1, 1, 1, 1, 2, 2, 2, 2]), Series([3, 3, 4, 4, 4, 4, 4, 3, 3])],
- ]
- )
- def by(request):
- return request.param
- @pytest.fixture(params=[True, False])
- def groupby_series(request):
- return request.param
- @pytest.mark.parametrize("how", ["method", "agg", "transform"])
- def test_groupby_raises_string(how, by, groupby_series, groupby_func):
- df = DataFrame(
- {
- "a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
- "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
- "c": range(9),
- "d": list("xyzwtyuio"),
- }
- )
- args = get_groupby_method_args(groupby_func, df)
- gb = df.groupby(by=by)
- if groupby_series:
- gb = gb["d"]
- if groupby_func == "corrwith":
- assert not hasattr(gb, "corrwith")
- return
- klass, msg = {
- "all": (None, ""),
- "any": (None, ""),
- "bfill": (None, ""),
- "corrwith": (TypeError, "Could not convert"),
- "count": (None, ""),
- "cumcount": (None, ""),
- "cummax": (
- (NotImplementedError, TypeError),
- "(function|cummax) is not (implemented|supported) for (this|object) dtype",
- ),
- "cummin": (
- (NotImplementedError, TypeError),
- "(function|cummin) is not (implemented|supported) for (this|object) dtype",
- ),
- "cumprod": (
- (NotImplementedError, TypeError),
- "(function|cumprod) is not (implemented|supported) for (this|object) dtype",
- ),
- "cumsum": (
- (NotImplementedError, TypeError),
- "(function|cumsum) is not (implemented|supported) for (this|object) dtype",
- ),
- "diff": (TypeError, "unsupported operand type"),
- "ffill": (None, ""),
- "fillna": (None, ""),
- "first": (None, ""),
- "idxmax": (TypeError, "'argmax' not allowed for this dtype"),
- "idxmin": (TypeError, "'argmin' not allowed for this dtype"),
- "last": (None, ""),
- "max": (None, ""),
- "mean": (TypeError, "Could not convert xy?z?w?t?y?u?i?o? to numeric"),
- "median": (TypeError, "could not convert string to float"),
- "min": (None, ""),
- "ngroup": (None, ""),
- "nunique": (None, ""),
- "pct_change": (TypeError, "unsupported operand type"),
- "prod": (TypeError, "can't multiply sequence by non-int of type 'str'"),
- "quantile": (TypeError, "cannot be performed against 'object' dtypes!"),
- "rank": (None, ""),
- "sem": (ValueError, "could not convert string to float"),
- "shift": (None, ""),
- "size": (None, ""),
- "skew": (TypeError, "could not convert string to float"),
- "std": (ValueError, "could not convert string to float"),
- "sum": (None, ""),
- "var": (TypeError, "could not convert string to float"),
- }[groupby_func]
- if klass is None:
- if how == "method":
- getattr(gb, groupby_func)(*args)
- elif how == "agg":
- gb.agg(groupby_func, *args)
- else:
- gb.transform(groupby_func, *args)
- else:
- with pytest.raises(klass, match=msg):
- if how == "method":
- getattr(gb, groupby_func)(*args)
- elif how == "agg":
- gb.agg(groupby_func, *args)
- else:
- gb.transform(groupby_func, *args)
- @pytest.mark.parametrize("how", ["agg", "transform"])
- def test_groupby_raises_string_udf(how, by, groupby_series):
- df = DataFrame(
- {
- "a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
- "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
- "c": range(9),
- "d": list("xyzwtyuio"),
- }
- )
- gb = df.groupby(by=by)
- if groupby_series:
- gb = gb["d"]
- def func(x):
- raise TypeError("Test error message")
- with pytest.raises(TypeError, match="Test error message"):
- getattr(gb, how)(func)
- @pytest.mark.parametrize("how", ["agg", "transform"])
- @pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean])
- def test_groupby_raises_string_np(how, by, groupby_series, groupby_func_np):
- # GH#50749
- df = DataFrame(
- {
- "a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
- "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
- "c": range(9),
- "d": list("xyzwtyuio"),
- }
- )
- gb = df.groupby(by=by)
- if groupby_series:
- gb = gb["d"]
- klass, msg = {
- np.sum: (None, ""),
- np.mean: (TypeError, "Could not convert xy?z?w?t?y?u?i?o? to numeric"),
- }[groupby_func_np]
- if klass is None:
- getattr(gb, how)(groupby_func_np)
- else:
- with pytest.raises(klass, match=msg):
- getattr(gb, how)(groupby_func_np)
- @pytest.mark.parametrize("how", ["method", "agg", "transform"])
- def test_groupby_raises_datetime(how, by, groupby_series, groupby_func):
- df = DataFrame(
- {
- "a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
- "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
- "c": range(9),
- "d": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000),
- }
- )
- args = get_groupby_method_args(groupby_func, df)
- gb = df.groupby(by=by)
- if groupby_series:
- gb = gb["d"]
- if groupby_func == "corrwith":
- assert not hasattr(gb, "corrwith")
- return
- klass, msg = {
- "all": (None, ""),
- "any": (None, ""),
- "bfill": (None, ""),
- "corrwith": (TypeError, "cannot perform __mul__ with this index type"),
- "count": (None, ""),
- "cumcount": (None, ""),
- "cummax": (None, ""),
- "cummin": (None, ""),
- "cumprod": (TypeError, "datetime64 type does not support cumprod operations"),
- "cumsum": (TypeError, "datetime64 type does not support cumsum operations"),
- "diff": (None, ""),
- "ffill": (None, ""),
- "fillna": (None, ""),
- "first": (None, ""),
- "idxmax": (None, ""),
- "idxmin": (None, ""),
- "last": (None, ""),
- "max": (None, ""),
- "mean": (None, ""),
- "median": (None, ""),
- "min": (None, ""),
- "ngroup": (None, ""),
- "nunique": (None, ""),
- "pct_change": (TypeError, "cannot perform __truediv__ with this index type"),
- "prod": (TypeError, "datetime64 type does not support prod"),
- "quantile": (None, ""),
- "rank": (None, ""),
- "sem": (None, ""),
- "shift": (None, ""),
- "size": (None, ""),
- "skew": (TypeError, r"dtype datetime64\[ns\] does not support reduction"),
- "std": (None, ""),
- "sum": (TypeError, "datetime64 type does not support sum operations"),
- "var": (None, ""),
- }[groupby_func]
- if klass is None:
- if how == "method":
- getattr(gb, groupby_func)(*args)
- elif how == "agg":
- gb.agg(groupby_func, *args)
- else:
- gb.transform(groupby_func, *args)
- else:
- with pytest.raises(klass, match=msg):
- if how == "method":
- getattr(gb, groupby_func)(*args)
- elif how == "agg":
- gb.agg(groupby_func, *args)
- else:
- gb.transform(groupby_func, *args)
- @pytest.mark.parametrize("how", ["agg", "transform"])
- def test_groupby_raises_datetime_udf(how, by, groupby_series):
- df = DataFrame(
- {
- "a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
- "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
- "c": range(9),
- "d": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000),
- }
- )
- gb = df.groupby(by=by)
- if groupby_series:
- gb = gb["d"]
- def func(x):
- raise TypeError("Test error message")
- with pytest.raises(TypeError, match="Test error message"):
- getattr(gb, how)(func)
- @pytest.mark.parametrize("how", ["agg", "transform"])
- @pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean])
- def test_groupby_raises_datetime_np(how, by, groupby_series, groupby_func_np):
- # GH#50749
- df = DataFrame(
- {
- "a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
- "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
- "c": range(9),
- "d": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000),
- }
- )
- gb = df.groupby(by=by)
- if groupby_series:
- gb = gb["d"]
- klass, msg = {
- np.sum: (TypeError, "datetime64 type does not support sum operations"),
- np.mean: (None, ""),
- }[groupby_func_np]
- if klass is None:
- getattr(gb, how)(groupby_func_np)
- else:
- with pytest.raises(klass, match=msg):
- getattr(gb, how)(groupby_func_np)
- @pytest.mark.parametrize("how", ["method", "agg", "transform"])
- def test_groupby_raises_category(
- how, by, groupby_series, groupby_func, using_copy_on_write
- ):
- # GH#50749
- df = DataFrame(
- {
- "a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
- "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
- "c": range(9),
- "d": Categorical(
- ["a", "a", "a", "a", "b", "b", "b", "b", "c"],
- categories=["a", "b", "c", "d"],
- ordered=True,
- ),
- }
- )
- args = get_groupby_method_args(groupby_func, df)
- gb = df.groupby(by=by)
- if groupby_series:
- gb = gb["d"]
- if groupby_func == "corrwith":
- assert not hasattr(gb, "corrwith")
- return
- klass, msg = {
- "all": (None, ""),
- "any": (None, ""),
- "bfill": (None, ""),
- "corrwith": (
- TypeError,
- r"unsupported operand type\(s\) for \*: 'Categorical' and 'int'",
- ),
- "count": (None, ""),
- "cumcount": (None, ""),
- "cummax": (
- (NotImplementedError, TypeError),
- "(category type does not support cummax operations|"
- + "category dtype not supported|"
- + "cummax is not supported for category dtype)",
- ),
- "cummin": (
- (NotImplementedError, TypeError),
- "(category type does not support cummin operations|"
- + "category dtype not supported|"
- "cummin is not supported for category dtype)",
- ),
- "cumprod": (
- (NotImplementedError, TypeError),
- "(category type does not support cumprod operations|"
- + "category dtype not supported|"
- "cumprod is not supported for category dtype)",
- ),
- "cumsum": (
- (NotImplementedError, TypeError),
- "(category type does not support cumsum operations|"
- + "category dtype not supported|"
- "cumsum is not supported for category dtype)",
- ),
- "diff": (
- TypeError,
- r"unsupported operand type\(s\) for -: 'Categorical' and 'Categorical'",
- ),
- "ffill": (None, ""),
- "fillna": (
- TypeError,
- r"Cannot setitem on a Categorical with a new category \(0\), "
- + "set the categories first",
- )
- if not using_copy_on_write
- else (None, ""), # no-op with CoW
- "first": (None, ""),
- "idxmax": (None, ""),
- "idxmin": (None, ""),
- "last": (None, ""),
- "max": (None, ""),
- "mean": (
- TypeError,
- "'Categorical' with dtype category does not support reduction 'mean'",
- ),
- "median": (
- TypeError,
- "'Categorical' with dtype category does not support reduction 'median'",
- ),
- "min": (None, ""),
- "ngroup": (None, ""),
- "nunique": (None, ""),
- "pct_change": (
- TypeError,
- r"unsupported operand type\(s\) for /: 'Categorical' and 'Categorical'",
- ),
- "prod": (TypeError, "category type does not support prod operations"),
- "quantile": (TypeError, "No matching signature found"),
- "rank": (None, ""),
- "sem": (ValueError, "Cannot cast object dtype to float64"),
- "shift": (None, ""),
- "size": (None, ""),
- "skew": (
- TypeError,
- "'Categorical' with dtype category does not support reduction 'skew'",
- ),
- "std": (ValueError, "Cannot cast object dtype to float64"),
- "sum": (TypeError, "category type does not support sum operations"),
- "var": (
- TypeError,
- "'Categorical' with dtype category does not support reduction 'var'",
- ),
- }[groupby_func]
- if klass is None:
- if how == "method":
- getattr(gb, groupby_func)(*args)
- elif how == "agg":
- gb.agg(groupby_func, *args)
- else:
- gb.transform(groupby_func, *args)
- else:
- with pytest.raises(klass, match=msg):
- if how == "method":
- getattr(gb, groupby_func)(*args)
- elif how == "agg":
- gb.agg(groupby_func, *args)
- else:
- gb.transform(groupby_func, *args)
- @pytest.mark.parametrize("how", ["agg", "transform"])
- def test_groupby_raises_category_udf(how, by, groupby_series):
- # GH#50749
- df = DataFrame(
- {
- "a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
- "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
- "c": range(9),
- "d": Categorical(
- ["a", "a", "a", "a", "b", "b", "b", "b", "c"],
- categories=["a", "b", "c", "d"],
- ordered=True,
- ),
- }
- )
- gb = df.groupby(by=by)
- if groupby_series:
- gb = gb["d"]
- def func(x):
- raise TypeError("Test error message")
- with pytest.raises(TypeError, match="Test error message"):
- getattr(gb, how)(func)
- @pytest.mark.parametrize("how", ["agg", "transform"])
- @pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean])
- def test_groupby_raises_category_np(how, by, groupby_series, groupby_func_np):
- # GH#50749
- df = DataFrame(
- {
- "a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
- "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
- "c": range(9),
- "d": Categorical(
- ["a", "a", "a", "a", "b", "b", "b", "b", "c"],
- categories=["a", "b", "c", "d"],
- ordered=True,
- ),
- }
- )
- gb = df.groupby(by=by)
- if groupby_series:
- gb = gb["d"]
- klass, msg = {
- np.sum: (TypeError, "category type does not support sum operations"),
- np.mean: (
- TypeError,
- "'Categorical' with dtype category does not support reduction 'mean'",
- ),
- }[groupby_func_np]
- if klass is None:
- getattr(gb, how)(groupby_func_np)
- else:
- with pytest.raises(klass, match=msg):
- getattr(gb, how)(groupby_func_np)
- @pytest.mark.parametrize("how", ["method", "agg", "transform"])
- def test_groupby_raises_category_on_category(
- how, by, groupby_series, groupby_func, observed, using_copy_on_write
- ):
- # GH#50749
- df = DataFrame(
- {
- "a": Categorical(
- ["a", "a", "a", "a", "b", "b", "b", "b", "c"],
- categories=["a", "b", "c", "d"],
- ordered=True,
- ),
- "b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
- "c": range(9),
- "d": Categorical(
- ["a", "a", "a", "a", "b", "b", "c", "c", "c"],
- categories=["a", "b", "c", "d"],
- ordered=True,
- ),
- }
- )
- args = get_groupby_method_args(groupby_func, df)
- gb = df.groupby(by=by, observed=observed)
- if groupby_series:
- gb = gb["d"]
- if groupby_func == "corrwith":
- assert not hasattr(gb, "corrwith")
- return
- empty_groups = any(group.empty for group in gb.groups.values())
- klass, msg = {
- "all": (None, ""),
- "any": (None, ""),
- "bfill": (None, ""),
- "corrwith": (
- TypeError,
- r"unsupported operand type\(s\) for \*: 'Categorical' and 'int'",
- ),
- "count": (None, ""),
- "cumcount": (None, ""),
- "cummax": (
- (NotImplementedError, TypeError),
- "(cummax is not supported for category dtype|"
- + "category dtype not supported|"
- + "category type does not support cummax operations)",
- ),
- "cummin": (
- (NotImplementedError, TypeError),
- "(cummin is not supported for category dtype|"
- + "category dtype not supported|"
- "category type does not support cummin operations)",
- ),
- "cumprod": (
- (NotImplementedError, TypeError),
- "(cumprod is not supported for category dtype|"
- + "category dtype not supported|"
- "category type does not support cumprod operations)",
- ),
- "cumsum": (
- (NotImplementedError, TypeError),
- "(cumsum is not supported for category dtype|"
- + "category dtype not supported|"
- + "category type does not support cumsum operations)",
- ),
- "diff": (TypeError, "unsupported operand type"),
- "ffill": (None, ""),
- "fillna": (
- TypeError,
- r"Cannot setitem on a Categorical with a new category \(0\), "
- + "set the categories first",
- )
- if not using_copy_on_write
- else (None, ""), # no-op with CoW
- "first": (None, ""),
- "idxmax": (ValueError, "attempt to get argmax of an empty sequence")
- if empty_groups
- else (None, ""),
- "idxmin": (ValueError, "attempt to get argmin of an empty sequence")
- if empty_groups
- else (None, ""),
- "last": (None, ""),
- "max": (None, ""),
- "mean": (
- TypeError,
- "'Categorical' with dtype category does not support reduction 'mean'",
- ),
- "median": (
- TypeError,
- "'Categorical' with dtype category does not support reduction 'median'",
- ),
- "min": (None, ""),
- "ngroup": (None, ""),
- "nunique": (None, ""),
- "pct_change": (TypeError, "unsupported operand type"),
- "prod": (TypeError, "category type does not support prod operations"),
- "quantile": (TypeError, ""),
- "rank": (None, ""),
- "sem": (ValueError, "Cannot cast object dtype to float64"),
- "shift": (None, ""),
- "size": (None, ""),
- "skew": (
- TypeError,
- "'Categorical' with dtype category does not support reduction 'skew'",
- ),
- "std": (ValueError, "Cannot cast object dtype to float64"),
- "sum": (TypeError, "category type does not support sum operations"),
- "var": (
- TypeError,
- "'Categorical' with dtype category does not support reduction 'var'",
- ),
- }[groupby_func]
- if klass is None:
- if how == "method":
- getattr(gb, groupby_func)(*args)
- elif how == "agg":
- gb.agg(groupby_func, *args)
- else:
- gb.transform(groupby_func, *args)
- else:
- with pytest.raises(klass, match=msg):
- if how == "method":
- getattr(gb, groupby_func)(*args)
- elif how == "agg":
- gb.agg(groupby_func, *args)
- else:
- gb.transform(groupby_func, *args)
- def test_subsetting_columns_axis_1_raises():
- # GH 35443
- df = DataFrame({"a": [1], "b": [2], "c": [3]})
- gb = df.groupby("a", axis=1)
- with pytest.raises(ValueError, match="Cannot subset columns when using axis=1"):
- gb["b"]
|