test_size.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. import numpy as np
  2. import pytest
  3. from pandas import (
  4. DataFrame,
  5. Index,
  6. PeriodIndex,
  7. Series,
  8. )
  9. import pandas._testing as tm
  10. @pytest.mark.parametrize("by", ["A", "B", ["A", "B"]])
  11. def test_size(df, by):
  12. grouped = df.groupby(by=by)
  13. result = grouped.size()
  14. for key, group in grouped:
  15. assert result[key] == len(group)
  16. @pytest.mark.parametrize(
  17. "by",
  18. [
  19. [0, 0, 0, 0],
  20. [0, 1, 1, 1],
  21. [1, 0, 1, 1],
  22. [0, None, None, None],
  23. pytest.param([None, None, None, None], marks=pytest.mark.xfail),
  24. ],
  25. )
  26. def test_size_axis_1(df, axis_1, by, sort, dropna):
  27. # GH#45715
  28. counts = {key: sum(value == key for value in by) for key in dict.fromkeys(by)}
  29. if dropna:
  30. counts = {key: value for key, value in counts.items() if key is not None}
  31. expected = Series(counts, dtype="int64")
  32. if sort:
  33. expected = expected.sort_index()
  34. if tm.is_integer_dtype(expected.index) and not any(x is None for x in by):
  35. expected.index = expected.index.astype(np.int_)
  36. grouped = df.groupby(by=by, axis=axis_1, sort=sort, dropna=dropna)
  37. result = grouped.size()
  38. tm.assert_series_equal(result, expected)
  39. @pytest.mark.parametrize("by", ["A", "B", ["A", "B"]])
  40. @pytest.mark.parametrize("sort", [True, False])
  41. def test_size_sort(sort, by):
  42. df = DataFrame(np.random.choice(20, (1000, 3)), columns=list("ABC"))
  43. left = df.groupby(by=by, sort=sort).size()
  44. right = df.groupby(by=by, sort=sort)["C"].apply(lambda a: a.shape[0])
  45. tm.assert_series_equal(left, right, check_names=False)
  46. def test_size_series_dataframe():
  47. # https://github.com/pandas-dev/pandas/issues/11699
  48. df = DataFrame(columns=["A", "B"])
  49. out = Series(dtype="int64", index=Index([], name="A"))
  50. tm.assert_series_equal(df.groupby("A").size(), out)
  51. def test_size_groupby_all_null():
  52. # https://github.com/pandas-dev/pandas/issues/23050
  53. # Assert no 'Value Error : Length of passed values is 2, index implies 0'
  54. df = DataFrame({"A": [None, None]}) # all-null groups
  55. result = df.groupby("A").size()
  56. expected = Series(dtype="int64", index=Index([], name="A"))
  57. tm.assert_series_equal(result, expected)
  58. def test_size_period_index():
  59. # https://github.com/pandas-dev/pandas/issues/34010
  60. ser = Series([1], index=PeriodIndex(["2000"], name="A", freq="D"))
  61. grp = ser.groupby(level="A")
  62. result = grp.size()
  63. tm.assert_series_equal(result, ser)
  64. @pytest.mark.parametrize("as_index", [True, False])
  65. def test_size_on_categorical(as_index):
  66. df = DataFrame([[1, 1], [2, 2]], columns=["A", "B"])
  67. df["A"] = df["A"].astype("category")
  68. result = df.groupby(["A", "B"], as_index=as_index).size()
  69. expected = DataFrame(
  70. [[1, 1, 1], [1, 2, 0], [2, 1, 0], [2, 2, 1]], columns=["A", "B", "size"]
  71. )
  72. expected["A"] = expected["A"].astype("category")
  73. if as_index:
  74. expected = expected.set_index(["A", "B"])["size"].rename(None)
  75. tm.assert_equal(result, expected)