test_api_consistency.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. """
  2. Test the consistency of the groupby API, both internally and with other pandas objects.
  3. """
  4. import inspect
  5. import pytest
  6. from pandas import (
  7. DataFrame,
  8. Series,
  9. )
  10. from pandas.core.groupby.generic import (
  11. DataFrameGroupBy,
  12. SeriesGroupBy,
  13. )
  14. def test_frame_consistency(request, groupby_func):
  15. # GH#48028
  16. if groupby_func in ("first", "last"):
  17. msg = "first and last are entirely different between frame and groupby"
  18. request.node.add_marker(pytest.mark.xfail(reason=msg))
  19. if groupby_func in ("cumcount",):
  20. msg = "DataFrame has no such method"
  21. request.node.add_marker(pytest.mark.xfail(reason=msg))
  22. if groupby_func == "ngroup":
  23. assert not hasattr(DataFrame, groupby_func)
  24. return
  25. frame_method = getattr(DataFrame, groupby_func)
  26. gb_method = getattr(DataFrameGroupBy, groupby_func)
  27. result = set(inspect.signature(gb_method).parameters)
  28. if groupby_func == "size":
  29. # "size" is a method on GroupBy but property on DataFrame:
  30. expected = {"self"}
  31. else:
  32. expected = set(inspect.signature(frame_method).parameters)
  33. # Exclude certain arguments from result and expected depending on the operation
  34. # Some of these may be purposeful inconsistencies between the APIs
  35. exclude_expected, exclude_result = set(), set()
  36. if groupby_func in ("any", "all"):
  37. exclude_expected = {"kwargs", "bool_only", "axis"}
  38. elif groupby_func in ("count",):
  39. exclude_expected = {"numeric_only", "axis"}
  40. elif groupby_func in ("nunique",):
  41. exclude_expected = {"axis"}
  42. elif groupby_func in ("max", "min"):
  43. exclude_expected = {"axis", "kwargs", "skipna"}
  44. exclude_result = {"min_count", "engine", "engine_kwargs"}
  45. elif groupby_func in ("mean", "std", "sum", "var"):
  46. exclude_expected = {"axis", "kwargs", "skipna"}
  47. exclude_result = {"engine", "engine_kwargs"}
  48. elif groupby_func in ("median", "prod", "sem"):
  49. exclude_expected = {"axis", "kwargs", "skipna"}
  50. elif groupby_func in ("backfill", "bfill", "ffill", "pad"):
  51. exclude_expected = {"downcast", "inplace", "axis"}
  52. elif groupby_func in ("cummax", "cummin"):
  53. exclude_expected = {"skipna", "args"}
  54. exclude_result = {"numeric_only"}
  55. elif groupby_func in ("cumprod", "cumsum"):
  56. exclude_expected = {"skipna"}
  57. elif groupby_func in ("pct_change",):
  58. exclude_expected = {"kwargs"}
  59. exclude_result = {"axis"}
  60. elif groupby_func in ("rank",):
  61. exclude_expected = {"numeric_only"}
  62. elif groupby_func in ("quantile",):
  63. exclude_expected = {"method", "axis"}
  64. # Ensure excluded arguments are actually in the signatures
  65. assert result & exclude_result == exclude_result
  66. assert expected & exclude_expected == exclude_expected
  67. result -= exclude_result
  68. expected -= exclude_expected
  69. assert result == expected
  70. def test_series_consistency(request, groupby_func):
  71. # GH#48028
  72. if groupby_func in ("first", "last"):
  73. msg = "first and last are entirely different between Series and groupby"
  74. request.node.add_marker(pytest.mark.xfail(reason=msg))
  75. if groupby_func in ("cumcount", "corrwith"):
  76. msg = "Series has no such method"
  77. request.node.add_marker(pytest.mark.xfail(reason=msg))
  78. if groupby_func == "ngroup":
  79. assert not hasattr(Series, groupby_func)
  80. return
  81. series_method = getattr(Series, groupby_func)
  82. gb_method = getattr(SeriesGroupBy, groupby_func)
  83. result = set(inspect.signature(gb_method).parameters)
  84. if groupby_func == "size":
  85. # "size" is a method on GroupBy but property on Series
  86. expected = {"self"}
  87. else:
  88. expected = set(inspect.signature(series_method).parameters)
  89. # Exclude certain arguments from result and expected depending on the operation
  90. # Some of these may be purposeful inconsistencies between the APIs
  91. exclude_expected, exclude_result = set(), set()
  92. if groupby_func in ("any", "all"):
  93. exclude_expected = {"kwargs", "bool_only", "axis"}
  94. elif groupby_func in ("diff",):
  95. exclude_result = {"axis"}
  96. elif groupby_func in ("max", "min"):
  97. exclude_expected = {"axis", "kwargs", "skipna"}
  98. exclude_result = {"min_count", "engine", "engine_kwargs"}
  99. elif groupby_func in ("mean", "std", "sum", "var"):
  100. exclude_expected = {"axis", "kwargs", "skipna"}
  101. exclude_result = {"engine", "engine_kwargs"}
  102. elif groupby_func in ("median", "prod", "sem"):
  103. exclude_expected = {"axis", "kwargs", "skipna"}
  104. elif groupby_func in ("backfill", "bfill", "ffill", "pad"):
  105. exclude_expected = {"downcast", "inplace", "axis"}
  106. elif groupby_func in ("cummax", "cummin"):
  107. exclude_expected = {"skipna", "args"}
  108. exclude_result = {"numeric_only"}
  109. elif groupby_func in ("cumprod", "cumsum"):
  110. exclude_expected = {"skipna"}
  111. elif groupby_func in ("pct_change",):
  112. exclude_expected = {"kwargs"}
  113. exclude_result = {"axis"}
  114. elif groupby_func in ("rank",):
  115. exclude_expected = {"numeric_only"}
  116. elif groupby_func in ("idxmin", "idxmax"):
  117. exclude_expected = {"args", "kwargs"}
  118. elif groupby_func in ("quantile",):
  119. exclude_result = {"numeric_only"}
  120. # Ensure excluded arguments are actually in the signatures
  121. assert result & exclude_result == exclude_result
  122. assert expected & exclude_expected == exclude_expected
  123. result -= exclude_result
  124. expected -= exclude_expected
  125. assert result == expected