123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142 |
- """
- Test the consistency of the groupby API, both internally and with other pandas objects.
- """
- import inspect
- import pytest
- from pandas import (
- DataFrame,
- Series,
- )
- from pandas.core.groupby.generic import (
- DataFrameGroupBy,
- SeriesGroupBy,
- )
- def test_frame_consistency(request, groupby_func):
- # GH#48028
- if groupby_func in ("first", "last"):
- msg = "first and last are entirely different between frame and groupby"
- request.node.add_marker(pytest.mark.xfail(reason=msg))
- if groupby_func in ("cumcount",):
- msg = "DataFrame has no such method"
- request.node.add_marker(pytest.mark.xfail(reason=msg))
- if groupby_func == "ngroup":
- assert not hasattr(DataFrame, groupby_func)
- return
- frame_method = getattr(DataFrame, groupby_func)
- gb_method = getattr(DataFrameGroupBy, groupby_func)
- result = set(inspect.signature(gb_method).parameters)
- if groupby_func == "size":
- # "size" is a method on GroupBy but property on DataFrame:
- expected = {"self"}
- else:
- expected = set(inspect.signature(frame_method).parameters)
- # Exclude certain arguments from result and expected depending on the operation
- # Some of these may be purposeful inconsistencies between the APIs
- exclude_expected, exclude_result = set(), set()
- if groupby_func in ("any", "all"):
- exclude_expected = {"kwargs", "bool_only", "axis"}
- elif groupby_func in ("count",):
- exclude_expected = {"numeric_only", "axis"}
- elif groupby_func in ("nunique",):
- exclude_expected = {"axis"}
- elif groupby_func in ("max", "min"):
- exclude_expected = {"axis", "kwargs", "skipna"}
- exclude_result = {"min_count", "engine", "engine_kwargs"}
- elif groupby_func in ("mean", "std", "sum", "var"):
- exclude_expected = {"axis", "kwargs", "skipna"}
- exclude_result = {"engine", "engine_kwargs"}
- elif groupby_func in ("median", "prod", "sem"):
- exclude_expected = {"axis", "kwargs", "skipna"}
- elif groupby_func in ("backfill", "bfill", "ffill", "pad"):
- exclude_expected = {"downcast", "inplace", "axis"}
- elif groupby_func in ("cummax", "cummin"):
- exclude_expected = {"skipna", "args"}
- exclude_result = {"numeric_only"}
- elif groupby_func in ("cumprod", "cumsum"):
- exclude_expected = {"skipna"}
- elif groupby_func in ("pct_change",):
- exclude_expected = {"kwargs"}
- exclude_result = {"axis"}
- elif groupby_func in ("rank",):
- exclude_expected = {"numeric_only"}
- elif groupby_func in ("quantile",):
- exclude_expected = {"method", "axis"}
- # Ensure excluded arguments are actually in the signatures
- assert result & exclude_result == exclude_result
- assert expected & exclude_expected == exclude_expected
- result -= exclude_result
- expected -= exclude_expected
- assert result == expected
- def test_series_consistency(request, groupby_func):
- # GH#48028
- if groupby_func in ("first", "last"):
- msg = "first and last are entirely different between Series and groupby"
- request.node.add_marker(pytest.mark.xfail(reason=msg))
- if groupby_func in ("cumcount", "corrwith"):
- msg = "Series has no such method"
- request.node.add_marker(pytest.mark.xfail(reason=msg))
- if groupby_func == "ngroup":
- assert not hasattr(Series, groupby_func)
- return
- series_method = getattr(Series, groupby_func)
- gb_method = getattr(SeriesGroupBy, groupby_func)
- result = set(inspect.signature(gb_method).parameters)
- if groupby_func == "size":
- # "size" is a method on GroupBy but property on Series
- expected = {"self"}
- else:
- expected = set(inspect.signature(series_method).parameters)
- # Exclude certain arguments from result and expected depending on the operation
- # Some of these may be purposeful inconsistencies between the APIs
- exclude_expected, exclude_result = set(), set()
- if groupby_func in ("any", "all"):
- exclude_expected = {"kwargs", "bool_only", "axis"}
- elif groupby_func in ("diff",):
- exclude_result = {"axis"}
- elif groupby_func in ("max", "min"):
- exclude_expected = {"axis", "kwargs", "skipna"}
- exclude_result = {"min_count", "engine", "engine_kwargs"}
- elif groupby_func in ("mean", "std", "sum", "var"):
- exclude_expected = {"axis", "kwargs", "skipna"}
- exclude_result = {"engine", "engine_kwargs"}
- elif groupby_func in ("median", "prod", "sem"):
- exclude_expected = {"axis", "kwargs", "skipna"}
- elif groupby_func in ("backfill", "bfill", "ffill", "pad"):
- exclude_expected = {"downcast", "inplace", "axis"}
- elif groupby_func in ("cummax", "cummin"):
- exclude_expected = {"skipna", "args"}
- exclude_result = {"numeric_only"}
- elif groupby_func in ("cumprod", "cumsum"):
- exclude_expected = {"skipna"}
- elif groupby_func in ("pct_change",):
- exclude_expected = {"kwargs"}
- exclude_result = {"axis"}
- elif groupby_func in ("rank",):
- exclude_expected = {"numeric_only"}
- elif groupby_func in ("idxmin", "idxmax"):
- exclude_expected = {"args", "kwargs"}
- elif groupby_func in ("quantile",):
- exclude_result = {"numeric_only"}
- # Ensure excluded arguments are actually in the signatures
- assert result & exclude_result == exclude_result
- assert expected & exclude_expected == exclude_expected
- result -= exclude_result
- expected -= exclude_expected
- assert result == expected
|