| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144 | 
							- """
 
- these are systematically testing all of the args to value_counts
 
- with different size combinations. This is to ensure stability of the sorting
 
- and proper parameter handling
 
- """
 
- from itertools import product
 
- import numpy as np
 
- import pytest
 
- from pandas import (
 
-     Categorical,
 
-     CategoricalIndex,
 
-     DataFrame,
 
-     Grouper,
 
-     Index,
 
-     MultiIndex,
 
-     Series,
 
-     date_range,
 
-     to_datetime,
 
- )
 
- import pandas._testing as tm
 
- from pandas.util.version import Version
 
- def tests_value_counts_index_names_category_column():
 
-     # GH44324 Missing name of index category column
 
-     df = DataFrame(
 
-         {
 
-             "gender": ["female"],
 
-             "country": ["US"],
 
-         }
 
-     )
 
-     df["gender"] = df["gender"].astype("category")
 
-     result = df.groupby("country")["gender"].value_counts()
 
-     # Construct expected, very specific multiindex
 
-     df_mi_expected = DataFrame([["US", "female"]], columns=["country", "gender"])
 
-     df_mi_expected["gender"] = df_mi_expected["gender"].astype("category")
 
-     mi_expected = MultiIndex.from_frame(df_mi_expected)
 
-     expected = Series([1], index=mi_expected, name="count")
 
-     tm.assert_series_equal(result, expected)
 
- # our starting frame
 
- def seed_df(seed_nans, n, m):
 
-     np.random.seed(1234)
 
-     days = date_range("2015-08-24", periods=10)
 
-     frame = DataFrame(
 
-         {
 
-             "1st": np.random.choice(list("abcd"), n),
 
-             "2nd": np.random.choice(days, n),
 
-             "3rd": np.random.randint(1, m + 1, n),
 
-         }
 
-     )
 
-     if seed_nans:
 
-         # Explicitly cast to float to avoid implicit cast when setting nan
 
-         frame["3rd"] = frame["3rd"].astype("float")
 
-         frame.loc[1::11, "1st"] = np.nan
 
-         frame.loc[3::17, "2nd"] = np.nan
 
-         frame.loc[7::19, "3rd"] = np.nan
 
-         frame.loc[8::19, "3rd"] = np.nan
 
-         frame.loc[9::19, "3rd"] = np.nan
 
-     return frame
 
- # create input df, keys, and the bins
 
- binned = []
 
- ids = []
 
- for seed_nans in [True, False]:
 
-     for n, m in product((100, 1000), (5, 20)):
 
-         df = seed_df(seed_nans, n, m)
 
-         bins = None, np.arange(0, max(5, df["3rd"].max()) + 1, 2)
 
-         keys = "1st", "2nd", ["1st", "2nd"]
 
-         for k, b in product(keys, bins):
 
-             binned.append((df, k, b, n, m))
 
-             ids.append(f"{k}-{n}-{m}")
 
- @pytest.mark.slow
 
- @pytest.mark.parametrize("df, keys, bins, n, m", binned, ids=ids)
 
- @pytest.mark.parametrize("isort", [True, False])
 
- @pytest.mark.parametrize("normalize, name", [(True, "proportion"), (False, "count")])
 
- @pytest.mark.parametrize("sort", [True, False])
 
- @pytest.mark.parametrize("ascending", [True, False])
 
- @pytest.mark.parametrize("dropna", [True, False])
 
- def test_series_groupby_value_counts(
 
-     df, keys, bins, n, m, isort, normalize, name, sort, ascending, dropna
 
- ):
 
-     def rebuild_index(df):
 
-         arr = list(map(df.index.get_level_values, range(df.index.nlevels)))
 
-         df.index = MultiIndex.from_arrays(arr, names=df.index.names)
 
-         return df
 
-     kwargs = {
 
-         "normalize": normalize,
 
-         "sort": sort,
 
-         "ascending": ascending,
 
-         "dropna": dropna,
 
-         "bins": bins,
 
-     }
 
-     gr = df.groupby(keys, sort=isort)
 
-     left = gr["3rd"].value_counts(**kwargs)
 
-     gr = df.groupby(keys, sort=isort)
 
-     right = gr["3rd"].apply(Series.value_counts, **kwargs)
 
-     right.index.names = right.index.names[:-1] + ["3rd"]
 
-     # https://github.com/pandas-dev/pandas/issues/49909
 
-     right = right.rename(name)
 
-     # have to sort on index because of unstable sort on values
 
-     left, right = map(rebuild_index, (left, right))  # xref GH9212
 
-     tm.assert_series_equal(left.sort_index(), right.sort_index())
 
- @pytest.mark.parametrize("utc", [True, False])
 
- def test_series_groupby_value_counts_with_grouper(utc):
 
-     # GH28479
 
-     df = DataFrame(
 
-         {
 
-             "Timestamp": [
 
-                 1565083561,
 
-                 1565083561 + 86400,
 
-                 1565083561 + 86500,
 
-                 1565083561 + 86400 * 2,
 
-                 1565083561 + 86400 * 3,
 
-                 1565083561 + 86500 * 3,
 
-                 1565083561 + 86400 * 4,
 
-             ],
 
-             "Food": ["apple", "apple", "banana", "banana", "orange", "orange", "pear"],
 
-         }
 
-     ).drop([3])
 
-     df["Datetime"] = to_datetime(df["Timestamp"], utc=utc, unit="s")
 
-     dfg = df.groupby(Grouper(freq="1D", key="Datetime"))
 
-     # have to sort on index because of unstable sort on values xref GH9212
 
-     result = dfg["Food"].value_counts().sort_index()
 
-     expected = dfg["Food"].apply(Series.value_counts).sort_index()
 
-     expected.index.names = result.index.names
 
-     # https://github.com/pandas-dev/pandas/issues/49909
 
-     expected = expected.rename("count")
 
-     tm.assert_series_equal(result, expected)
 
- @pytest.mark.parametrize("columns", [["A", "B"], ["A", "B", "C"]])
 
- def test_series_groupby_value_counts_empty(columns):
 
-     # GH39172
 
-     df = DataFrame(columns=columns)
 
-     dfg = df.groupby(columns[:-1])
 
-     result = dfg[columns[-1]].value_counts()
 
-     expected = Series([], dtype=result.dtype, name="count")
 
-     expected.index = MultiIndex.from_arrays([[]] * len(columns), names=columns)
 
-     tm.assert_series_equal(result, expected)
 
- @pytest.mark.parametrize("columns", [["A", "B"], ["A", "B", "C"]])
 
- def test_series_groupby_value_counts_one_row(columns):
 
-     # GH42618
 
-     df = DataFrame(data=[range(len(columns))], columns=columns)
 
-     dfg = df.groupby(columns[:-1])
 
-     result = dfg[columns[-1]].value_counts()
 
-     expected = df.value_counts()
 
-     tm.assert_series_equal(result, expected)
 
- def test_series_groupby_value_counts_on_categorical():
 
-     # GH38672
 
-     s = Series(Categorical(["a"], categories=["a", "b"]))
 
-     result = s.groupby([0]).value_counts()
 
-     expected = Series(
 
-         data=[1, 0],
 
-         index=MultiIndex.from_arrays(
 
-             [
 
-                 np.array([0, 0]),
 
-                 CategoricalIndex(
 
-                     ["a", "b"], categories=["a", "b"], ordered=False, dtype="category"
 
-                 ),
 
-             ]
 
-         ),
 
-         name="count",
 
-     )
 
-     # Expected:
 
-     # 0  a    1
 
-     #    b    0
 
-     # dtype: int64
 
-     tm.assert_series_equal(result, expected)
 
- def test_series_groupby_value_counts_no_sort():
 
-     # GH#50482
 
-     df = DataFrame(
 
-         {
 
-             "gender": ["male", "male", "female", "male", "female", "male"],
 
-             "education": ["low", "medium", "high", "low", "high", "low"],
 
-             "country": ["US", "FR", "US", "FR", "FR", "FR"],
 
-         }
 
-     )
 
-     gb = df.groupby(["country", "gender"], sort=False)["education"]
 
-     result = gb.value_counts(sort=False)
 
-     index = MultiIndex(
 
-         levels=[["US", "FR"], ["male", "female"], ["low", "medium", "high"]],
 
-         codes=[[0, 1, 0, 1, 1], [0, 0, 1, 0, 1], [0, 1, 2, 0, 2]],
 
-         names=["country", "gender", "education"],
 
-     )
 
-     expected = Series([1, 1, 1, 2, 1], index=index, name="count")
 
-     tm.assert_series_equal(result, expected)
 
- @pytest.fixture
 
- def education_df():
 
-     return DataFrame(
 
-         {
 
-             "gender": ["male", "male", "female", "male", "female", "male"],
 
-             "education": ["low", "medium", "high", "low", "high", "low"],
 
-             "country": ["US", "FR", "US", "FR", "FR", "FR"],
 
-         }
 
-     )
 
- def test_axis(education_df):
 
-     gp = education_df.groupby("country", axis=1)
 
-     with pytest.raises(NotImplementedError, match="axis"):
 
-         gp.value_counts()
 
- def test_bad_subset(education_df):
 
-     gp = education_df.groupby("country")
 
-     with pytest.raises(ValueError, match="subset"):
 
-         gp.value_counts(subset=["country"])
 
- def test_basic(education_df, request):
 
-     # gh43564
 
-     if Version(np.__version__) >= Version("1.25"):
 
-         request.node.add_marker(
 
-             pytest.mark.xfail(
 
-                 reason=(
 
-                     "pandas default unstable sorting of duplicates"
 
-                     "issue with numpy>=1.25 with AVX instructions"
 
-                 ),
 
-                 strict=False,
 
-             )
 
-         )
 
-     result = education_df.groupby("country")[["gender", "education"]].value_counts(
 
-         normalize=True
 
-     )
 
-     expected = Series(
 
-         data=[0.5, 0.25, 0.25, 0.5, 0.5],
 
-         index=MultiIndex.from_tuples(
 
-             [
 
-                 ("FR", "male", "low"),
 
-                 ("FR", "female", "high"),
 
-                 ("FR", "male", "medium"),
 
-                 ("US", "female", "high"),
 
-                 ("US", "male", "low"),
 
-             ],
 
-             names=["country", "gender", "education"],
 
-         ),
 
-         name="proportion",
 
-     )
 
-     tm.assert_series_equal(result, expected)
 
- def _frame_value_counts(df, keys, normalize, sort, ascending):
 
-     return df[keys].value_counts(normalize=normalize, sort=sort, ascending=ascending)
 
- @pytest.mark.parametrize("groupby", ["column", "array", "function"])
 
- @pytest.mark.parametrize("normalize, name", [(True, "proportion"), (False, "count")])
 
- @pytest.mark.parametrize(
 
-     "sort, ascending",
 
-     [
 
-         (False, None),
 
-         (True, True),
 
-         (True, False),
 
-     ],
 
- )
 
- @pytest.mark.parametrize("as_index", [True, False])
 
- @pytest.mark.parametrize("frame", [True, False])
 
- def test_against_frame_and_seriesgroupby(
 
-     education_df, groupby, normalize, name, sort, ascending, as_index, frame, request
 
- ):
 
-     # test all parameters:
 
-     # - Use column, array or function as by= parameter
 
-     # - Whether or not to normalize
 
-     # - Whether or not to sort and how
 
-     # - Whether or not to use the groupby as an index
 
-     # - 3-way compare against:
 
-     #   - apply with :meth:`~DataFrame.value_counts`
 
-     #   - `~SeriesGroupBy.value_counts`
 
-     if Version(np.__version__) >= Version("1.25") and frame and sort and normalize:
 
-         request.node.add_marker(
 
-             pytest.mark.xfail(
 
-                 reason=(
 
-                     "pandas default unstable sorting of duplicates"
 
-                     "issue with numpy>=1.25 with AVX instructions"
 
-                 ),
 
-                 strict=False,
 
-             )
 
-         )
 
-     by = {
 
-         "column": "country",
 
-         "array": education_df["country"].values,
 
-         "function": lambda x: education_df["country"][x] == "US",
 
-     }[groupby]
 
-     gp = education_df.groupby(by=by, as_index=as_index)
 
-     result = gp[["gender", "education"]].value_counts(
 
-         normalize=normalize, sort=sort, ascending=ascending
 
-     )
 
-     if frame:
 
-         # compare against apply with DataFrame value_counts
 
-         expected = gp.apply(
 
-             _frame_value_counts, ["gender", "education"], normalize, sort, ascending
 
-         )
 
-         if as_index:
 
-             tm.assert_series_equal(result, expected)
 
-         else:
 
-             name = "proportion" if normalize else "count"
 
-             expected = expected.reset_index().rename({0: name}, axis=1)
 
-             if groupby == "column":
 
-                 expected = expected.rename({"level_0": "country"}, axis=1)
 
-                 expected["country"] = np.where(expected["country"], "US", "FR")
 
-             elif groupby == "function":
 
-                 expected["level_0"] = expected["level_0"] == 1
 
-             else:
 
-                 expected["level_0"] = np.where(expected["level_0"], "US", "FR")
 
-             tm.assert_frame_equal(result, expected)
 
-     else:
 
-         # compare against SeriesGroupBy value_counts
 
-         education_df["both"] = education_df["gender"] + "-" + education_df["education"]
 
-         expected = gp["both"].value_counts(
 
-             normalize=normalize, sort=sort, ascending=ascending
 
-         )
 
-         expected.name = name
 
-         if as_index:
 
-             index_frame = expected.index.to_frame(index=False)
 
-             index_frame["gender"] = index_frame["both"].str.split("-").str.get(0)
 
-             index_frame["education"] = index_frame["both"].str.split("-").str.get(1)
 
-             del index_frame["both"]
 
-             index_frame = index_frame.rename({0: None}, axis=1)
 
-             expected.index = MultiIndex.from_frame(index_frame)
 
-             tm.assert_series_equal(result, expected)
 
-         else:
 
-             expected.insert(1, "gender", expected["both"].str.split("-").str.get(0))
 
-             expected.insert(2, "education", expected["both"].str.split("-").str.get(1))
 
-             del expected["both"]
 
-             tm.assert_frame_equal(result, expected)
 
- @pytest.mark.parametrize("normalize", [True, False])
 
- @pytest.mark.parametrize(
 
-     "sort, ascending, expected_rows, expected_count, expected_group_size",
 
-     [
 
-         (False, None, [0, 1, 2, 3, 4], [1, 1, 1, 2, 1], [1, 3, 1, 3, 1]),
 
-         (True, False, [4, 3, 1, 2, 0], [1, 2, 1, 1, 1], [1, 3, 3, 1, 1]),
 
-         (True, True, [4, 1, 3, 2, 0], [1, 1, 2, 1, 1], [1, 3, 3, 1, 1]),
 
-     ],
 
- )
 
- def test_compound(
 
-     education_df,
 
-     normalize,
 
-     sort,
 
-     ascending,
 
-     expected_rows,
 
-     expected_count,
 
-     expected_group_size,
 
- ):
 
-     # Multiple groupby keys and as_index=False
 
-     gp = education_df.groupby(["country", "gender"], as_index=False, sort=False)
 
-     result = gp["education"].value_counts(
 
-         normalize=normalize, sort=sort, ascending=ascending
 
-     )
 
-     expected = DataFrame()
 
-     for column in ["country", "gender", "education"]:
 
-         expected[column] = [education_df[column][row] for row in expected_rows]
 
-     if normalize:
 
-         expected["proportion"] = expected_count
 
-         expected["proportion"] /= expected_group_size
 
-     else:
 
-         expected["count"] = expected_count
 
-     tm.assert_frame_equal(result, expected)
 
- @pytest.fixture
 
- def animals_df():
 
-     return DataFrame(
 
-         {"key": [1, 1, 1, 1], "num_legs": [2, 4, 4, 6], "num_wings": [2, 0, 0, 0]},
 
-         index=["falcon", "dog", "cat", "ant"],
 
-     )
 
- @pytest.mark.parametrize(
 
-     "sort, ascending, normalize, name, expected_data, expected_index",
 
-     [
 
-         (False, None, False, "count", [1, 2, 1], [(1, 1, 1), (2, 4, 6), (2, 0, 0)]),
 
-         (True, True, False, "count", [1, 1, 2], [(1, 1, 1), (2, 6, 4), (2, 0, 0)]),
 
-         (True, False, False, "count", [2, 1, 1], [(1, 1, 1), (4, 2, 6), (0, 2, 0)]),
 
-         (
 
-             True,
 
-             False,
 
-             True,
 
-             "proportion",
 
-             [0.5, 0.25, 0.25],
 
-             [(1, 1, 1), (4, 2, 6), (0, 2, 0)],
 
-         ),
 
-     ],
 
- )
 
- def test_data_frame_value_counts(
 
-     animals_df, sort, ascending, normalize, name, expected_data, expected_index
 
- ):
 
-     # 3-way compare with :meth:`~DataFrame.value_counts`
 
-     # Tests from frame/methods/test_value_counts.py
 
-     result_frame = animals_df.value_counts(
 
-         sort=sort, ascending=ascending, normalize=normalize
 
-     )
 
-     expected = Series(
 
-         data=expected_data,
 
-         index=MultiIndex.from_arrays(
 
-             expected_index, names=["key", "num_legs", "num_wings"]
 
-         ),
 
-         name=name,
 
-     )
 
-     tm.assert_series_equal(result_frame, expected)
 
-     result_frame_groupby = animals_df.groupby("key").value_counts(
 
-         sort=sort, ascending=ascending, normalize=normalize
 
-     )
 
-     tm.assert_series_equal(result_frame_groupby, expected)
 
- @pytest.fixture
 
- def nulls_df():
 
-     n = np.nan
 
-     return DataFrame(
 
-         {
 
-             "A": [1, 1, n, 4, n, 6, 6, 6, 6],
 
-             "B": [1, 1, 3, n, n, 6, 6, 6, 6],
 
-             "C": [1, 2, 3, 4, 5, 6, n, 8, n],
 
-             "D": [1, 2, 3, 4, 5, 6, 7, n, n],
 
-         }
 
-     )
 
- @pytest.mark.parametrize(
 
-     "group_dropna, count_dropna, expected_rows, expected_values",
 
-     [
 
-         (
 
-             False,
 
-             False,
 
-             [0, 1, 3, 5, 7, 6, 8, 2, 4],
 
-             [0.5, 0.5, 1.0, 0.25, 0.25, 0.25, 0.25, 1.0, 1.0],
 
-         ),
 
-         (False, True, [0, 1, 3, 5, 2, 4], [0.5, 0.5, 1.0, 1.0, 1.0, 1.0]),
 
-         (True, False, [0, 1, 5, 7, 6, 8], [0.5, 0.5, 0.25, 0.25, 0.25, 0.25]),
 
-         (True, True, [0, 1, 5], [0.5, 0.5, 1.0]),
 
-     ],
 
- )
 
- def test_dropna_combinations(
 
-     nulls_df, group_dropna, count_dropna, expected_rows, expected_values, request
 
- ):
 
-     if Version(np.__version__) >= Version("1.25") and not group_dropna:
 
-         request.node.add_marker(
 
-             pytest.mark.xfail(
 
-                 reason=(
 
-                     "pandas default unstable sorting of duplicates"
 
-                     "issue with numpy>=1.25 with AVX instructions"
 
-                 ),
 
-                 strict=False,
 
-             )
 
-         )
 
-     gp = nulls_df.groupby(["A", "B"], dropna=group_dropna)
 
-     result = gp.value_counts(normalize=True, sort=True, dropna=count_dropna)
 
-     columns = DataFrame()
 
-     for column in nulls_df.columns:
 
-         columns[column] = [nulls_df[column][row] for row in expected_rows]
 
-     index = MultiIndex.from_frame(columns)
 
-     expected = Series(data=expected_values, index=index, name="proportion")
 
-     tm.assert_series_equal(result, expected)
 
- @pytest.fixture
 
- def names_with_nulls_df(nulls_fixture):
 
-     return DataFrame(
 
-         {
 
-             "key": [1, 1, 1, 1],
 
-             "first_name": ["John", "Anne", "John", "Beth"],
 
-             "middle_name": ["Smith", nulls_fixture, nulls_fixture, "Louise"],
 
-         },
 
-     )
 
- @pytest.mark.parametrize(
 
-     "dropna, expected_data, expected_index",
 
-     [
 
-         (
 
-             True,
 
-             [1, 1],
 
-             MultiIndex.from_arrays(
 
-                 [(1, 1), ("Beth", "John"), ("Louise", "Smith")],
 
-                 names=["key", "first_name", "middle_name"],
 
-             ),
 
-         ),
 
-         (
 
-             False,
 
-             [1, 1, 1, 1],
 
-             MultiIndex(
 
-                 levels=[
 
-                     Index([1]),
 
-                     Index(["Anne", "Beth", "John"]),
 
-                     Index(["Louise", "Smith", np.nan]),
 
-                 ],
 
-                 codes=[[0, 0, 0, 0], [0, 1, 2, 2], [2, 0, 1, 2]],
 
-                 names=["key", "first_name", "middle_name"],
 
-             ),
 
-         ),
 
-     ],
 
- )
 
- @pytest.mark.parametrize("normalize, name", [(False, "count"), (True, "proportion")])
 
- def test_data_frame_value_counts_dropna(
 
-     names_with_nulls_df, dropna, normalize, name, expected_data, expected_index
 
- ):
 
-     # GH 41334
 
-     # 3-way compare with :meth:`~DataFrame.value_counts`
 
-     # Tests with nulls from frame/methods/test_value_counts.py
 
-     result_frame = names_with_nulls_df.value_counts(dropna=dropna, normalize=normalize)
 
-     expected = Series(
 
-         data=expected_data,
 
-         index=expected_index,
 
-         name=name,
 
-     )
 
-     if normalize:
 
-         expected /= float(len(expected_data))
 
-     tm.assert_series_equal(result_frame, expected)
 
-     result_frame_groupby = names_with_nulls_df.groupby("key").value_counts(
 
-         dropna=dropna, normalize=normalize
 
-     )
 
-     tm.assert_series_equal(result_frame_groupby, expected)
 
- @pytest.mark.parametrize("as_index", [False, True])
 
- @pytest.mark.parametrize("observed", [False, True])
 
- @pytest.mark.parametrize(
 
-     "normalize, name, expected_data",
 
-     [
 
-         (
 
-             False,
 
-             "count",
 
-             np.array([2, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0], dtype=np.int64),
 
-         ),
 
-         (
 
-             True,
 
-             "proportion",
 
-             np.array([0.5, 0.25, 0.25, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]),
 
-         ),
 
-     ],
 
- )
 
- def test_categorical_single_grouper_with_only_observed_categories(
 
-     education_df, as_index, observed, normalize, name, expected_data, request
 
- ):
 
-     # Test single categorical grouper with only observed grouping categories
 
-     # when non-groupers are also categorical
 
-     if Version(np.__version__) >= Version("1.25"):
 
-         request.node.add_marker(
 
-             pytest.mark.xfail(
 
-                 reason=(
 
-                     "pandas default unstable sorting of duplicates"
 
-                     "issue with numpy>=1.25 with AVX instructions"
 
-                 ),
 
-                 strict=False,
 
-             )
 
-         )
 
-     gp = education_df.astype("category").groupby(
 
-         "country", as_index=as_index, observed=observed
 
-     )
 
-     result = gp.value_counts(normalize=normalize)
 
-     expected_index = MultiIndex.from_tuples(
 
-         [
 
-             ("FR", "male", "low"),
 
-             ("FR", "female", "high"),
 
-             ("FR", "male", "medium"),
 
-             ("FR", "female", "low"),
 
-             ("FR", "female", "medium"),
 
-             ("FR", "male", "high"),
 
-             ("US", "female", "high"),
 
-             ("US", "male", "low"),
 
-             ("US", "female", "low"),
 
-             ("US", "female", "medium"),
 
-             ("US", "male", "high"),
 
-             ("US", "male", "medium"),
 
-         ],
 
-         names=["country", "gender", "education"],
 
-     )
 
-     expected_series = Series(
 
-         data=expected_data,
 
-         index=expected_index,
 
-         name=name,
 
-     )
 
-     for i in range(3):
 
-         expected_series.index = expected_series.index.set_levels(
 
-             CategoricalIndex(expected_series.index.levels[i]), level=i
 
-         )
 
-     if as_index:
 
-         tm.assert_series_equal(result, expected_series)
 
-     else:
 
-         expected = expected_series.reset_index(
 
-             name="proportion" if normalize else "count"
 
-         )
 
-         tm.assert_frame_equal(result, expected)
 
- def assert_categorical_single_grouper(
 
-     education_df, as_index, observed, expected_index, normalize, name, expected_data
 
- ):
 
-     # Test single categorical grouper when non-groupers are also categorical
 
-     education_df = education_df.copy().astype("category")
 
-     # Add non-observed grouping categories
 
-     education_df["country"] = education_df["country"].cat.add_categories(["ASIA"])
 
-     gp = education_df.groupby("country", as_index=as_index, observed=observed)
 
-     result = gp.value_counts(normalize=normalize)
 
-     expected_series = Series(
 
-         data=expected_data,
 
-         index=MultiIndex.from_tuples(
 
-             expected_index,
 
-             names=["country", "gender", "education"],
 
-         ),
 
-         name=name,
 
-     )
 
-     for i in range(3):
 
-         index_level = CategoricalIndex(expected_series.index.levels[i])
 
-         if i == 0:
 
-             index_level = index_level.set_categories(
 
-                 education_df["country"].cat.categories
 
-             )
 
-         expected_series.index = expected_series.index.set_levels(index_level, level=i)
 
-     if as_index:
 
-         tm.assert_series_equal(result, expected_series)
 
-     else:
 
-         expected = expected_series.reset_index(name=name)
 
-         tm.assert_frame_equal(result, expected)
 
- @pytest.mark.parametrize("as_index", [True, False])
 
- @pytest.mark.parametrize(
 
-     "normalize, name, expected_data",
 
-     [
 
-         (
 
-             False,
 
-             "count",
 
-             np.array([2, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0], dtype=np.int64),
 
-         ),
 
-         (
 
-             True,
 
-             "proportion",
 
-             np.array([0.5, 0.25, 0.25, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]),
 
-         ),
 
-     ],
 
- )
 
- def test_categorical_single_grouper_observed_true(
 
-     education_df, as_index, normalize, name, expected_data, request
 
- ):
 
-     # GH#46357
 
-     if Version(np.__version__) >= Version("1.25"):
 
-         request.node.add_marker(
 
-             pytest.mark.xfail(
 
-                 reason=(
 
-                     "pandas default unstable sorting of duplicates"
 
-                     "issue with numpy>=1.25 with AVX instructions"
 
-                 ),
 
-                 strict=False,
 
-             )
 
-         )
 
-     expected_index = [
 
-         ("FR", "male", "low"),
 
-         ("FR", "female", "high"),
 
-         ("FR", "male", "medium"),
 
-         ("FR", "female", "low"),
 
-         ("FR", "female", "medium"),
 
-         ("FR", "male", "high"),
 
-         ("US", "female", "high"),
 
-         ("US", "male", "low"),
 
-         ("US", "female", "low"),
 
-         ("US", "female", "medium"),
 
-         ("US", "male", "high"),
 
-         ("US", "male", "medium"),
 
-     ]
 
-     assert_categorical_single_grouper(
 
-         education_df=education_df,
 
-         as_index=as_index,
 
-         observed=True,
 
-         expected_index=expected_index,
 
-         normalize=normalize,
 
-         name=name,
 
-         expected_data=expected_data,
 
-     )
 
- @pytest.mark.parametrize("as_index", [True, False])
 
- @pytest.mark.parametrize(
 
-     "normalize, name, expected_data",
 
-     [
 
-         (
 
-             False,
 
-             "count",
 
-             np.array(
 
-                 [2, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=np.int64
 
-             ),
 
-         ),
 
-         (
 
-             True,
 
-             "proportion",
 
-             np.array(
 
-                 [
 
-                     0.5,
 
-                     0.25,
 
-                     0.25,
 
-                     0.0,
 
-                     0.0,
 
-                     0.0,
 
-                     0.5,
 
-                     0.5,
 
-                     0.0,
 
-                     0.0,
 
-                     0.0,
 
-                     0.0,
 
-                     0.0,
 
-                     0.0,
 
-                     0.0,
 
-                     0.0,
 
-                     0.0,
 
-                     0.0,
 
-                 ]
 
-             ),
 
-         ),
 
-     ],
 
- )
 
- def test_categorical_single_grouper_observed_false(
 
-     education_df, as_index, normalize, name, expected_data, request
 
- ):
 
-     # GH#46357
 
-     if Version(np.__version__) >= Version("1.25"):
 
-         request.node.add_marker(
 
-             pytest.mark.xfail(
 
-                 reason=(
 
-                     "pandas default unstable sorting of duplicates"
 
-                     "issue with numpy>=1.25 with AVX instructions"
 
-                 ),
 
-                 strict=False,
 
-             )
 
-         )
 
-     expected_index = [
 
-         ("FR", "male", "low"),
 
-         ("FR", "female", "high"),
 
-         ("FR", "male", "medium"),
 
-         ("FR", "female", "low"),
 
-         ("FR", "male", "high"),
 
-         ("FR", "female", "medium"),
 
-         ("US", "female", "high"),
 
-         ("US", "male", "low"),
 
-         ("US", "male", "medium"),
 
-         ("US", "male", "high"),
 
-         ("US", "female", "medium"),
 
-         ("US", "female", "low"),
 
-         ("ASIA", "male", "low"),
 
-         ("ASIA", "male", "high"),
 
-         ("ASIA", "female", "medium"),
 
-         ("ASIA", "female", "low"),
 
-         ("ASIA", "female", "high"),
 
-         ("ASIA", "male", "medium"),
 
-     ]
 
-     assert_categorical_single_grouper(
 
-         education_df=education_df,
 
-         as_index=as_index,
 
-         observed=False,
 
-         expected_index=expected_index,
 
-         normalize=normalize,
 
-         name=name,
 
-         expected_data=expected_data,
 
-     )
 
- @pytest.mark.parametrize("as_index", [True, False])
 
- @pytest.mark.parametrize(
 
-     "observed, expected_index",
 
-     [
 
-         (
 
-             False,
 
-             [
 
-                 ("FR", "high", "female"),
 
-                 ("FR", "high", "male"),
 
-                 ("FR", "low", "male"),
 
-                 ("FR", "low", "female"),
 
-                 ("FR", "medium", "male"),
 
-                 ("FR", "medium", "female"),
 
-                 ("US", "high", "female"),
 
-                 ("US", "high", "male"),
 
-                 ("US", "low", "male"),
 
-                 ("US", "low", "female"),
 
-                 ("US", "medium", "female"),
 
-                 ("US", "medium", "male"),
 
-             ],
 
-         ),
 
-         (
 
-             True,
 
-             [
 
-                 ("FR", "high", "female"),
 
-                 ("FR", "low", "male"),
 
-                 ("FR", "medium", "male"),
 
-                 ("US", "high", "female"),
 
-                 ("US", "low", "male"),
 
-             ],
 
-         ),
 
-     ],
 
- )
 
- @pytest.mark.parametrize(
 
-     "normalize, name, expected_data",
 
-     [
 
-         (
 
-             False,
 
-             "count",
 
-             np.array([1, 0, 2, 0, 1, 0, 1, 0, 1, 0, 0, 0], dtype=np.int64),
 
-         ),
 
-         (
 
-             True,
 
-             "proportion",
 
-             # NaN values corresponds to non-observed groups
 
-             np.array([1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0]),
 
-         ),
 
-     ],
 
- )
 
- def test_categorical_multiple_groupers(
 
-     education_df, as_index, observed, expected_index, normalize, name, expected_data
 
- ):
 
-     # GH#46357
 
-     # Test multiple categorical groupers when non-groupers are non-categorical
 
-     education_df = education_df.copy()
 
-     education_df["country"] = education_df["country"].astype("category")
 
-     education_df["education"] = education_df["education"].astype("category")
 
-     gp = education_df.groupby(
 
-         ["country", "education"], as_index=as_index, observed=observed
 
-     )
 
-     result = gp.value_counts(normalize=normalize)
 
-     expected_series = Series(
 
-         data=expected_data[expected_data > 0.0] if observed else expected_data,
 
-         index=MultiIndex.from_tuples(
 
-             expected_index,
 
-             names=["country", "education", "gender"],
 
-         ),
 
-         name=name,
 
-     )
 
-     for i in range(2):
 
-         expected_series.index = expected_series.index.set_levels(
 
-             CategoricalIndex(expected_series.index.levels[i]), level=i
 
-         )
 
-     if as_index:
 
-         tm.assert_series_equal(result, expected_series)
 
-     else:
 
-         expected = expected_series.reset_index(
 
-             name="proportion" if normalize else "count"
 
-         )
 
-         tm.assert_frame_equal(result, expected)
 
- @pytest.mark.parametrize("as_index", [False, True])
 
- @pytest.mark.parametrize("observed", [False, True])
 
- @pytest.mark.parametrize(
 
-     "normalize, name, expected_data",
 
-     [
 
-         (
 
-             False,
 
-             "count",
 
-             np.array([2, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0], dtype=np.int64),
 
-         ),
 
-         (
 
-             True,
 
-             "proportion",
 
-             # NaN values corresponds to non-observed groups
 
-             np.array([0.5, 0.25, 0.25, 0.0, 0.0, 0.0, 0.5, 0.5, 0.0, 0.0, 0.0, 0.0]),
 
-         ),
 
-     ],
 
- )
 
- def test_categorical_non_groupers(
 
-     education_df, as_index, observed, normalize, name, expected_data, request
 
- ):
 
-     # GH#46357 Test non-observed categories are included in the result,
 
-     # regardless of `observed`
 
-     if Version(np.__version__) >= Version("1.25"):
 
-         request.node.add_marker(
 
-             pytest.mark.xfail(
 
-                 reason=(
 
-                     "pandas default unstable sorting of duplicates"
 
-                     "issue with numpy>=1.25 with AVX instructions"
 
-                 ),
 
-                 strict=False,
 
-             )
 
-         )
 
-     education_df = education_df.copy()
 
-     education_df["gender"] = education_df["gender"].astype("category")
 
-     education_df["education"] = education_df["education"].astype("category")
 
-     gp = education_df.groupby("country", as_index=as_index, observed=observed)
 
-     result = gp.value_counts(normalize=normalize)
 
-     expected_index = [
 
-         ("FR", "male", "low"),
 
-         ("FR", "female", "high"),
 
-         ("FR", "male", "medium"),
 
-         ("FR", "female", "low"),
 
-         ("FR", "female", "medium"),
 
-         ("FR", "male", "high"),
 
-         ("US", "female", "high"),
 
-         ("US", "male", "low"),
 
-         ("US", "female", "low"),
 
-         ("US", "female", "medium"),
 
-         ("US", "male", "high"),
 
-         ("US", "male", "medium"),
 
-     ]
 
-     expected_series = Series(
 
-         data=expected_data,
 
-         index=MultiIndex.from_tuples(
 
-             expected_index,
 
-             names=["country", "gender", "education"],
 
-         ),
 
-         name=name,
 
-     )
 
-     for i in range(1, 3):
 
-         expected_series.index = expected_series.index.set_levels(
 
-             CategoricalIndex(expected_series.index.levels[i]), level=i
 
-         )
 
-     if as_index:
 
-         tm.assert_series_equal(result, expected_series)
 
-     else:
 
-         expected = expected_series.reset_index(
 
-             name="proportion" if normalize else "count"
 
-         )
 
-         tm.assert_frame_equal(result, expected)
 
- @pytest.mark.parametrize(
 
-     "normalize, expected_label, expected_values",
 
-     [
 
-         (False, "count", [1, 1, 1]),
 
-         (True, "proportion", [0.5, 0.5, 1.0]),
 
-     ],
 
- )
 
- def test_mixed_groupings(normalize, expected_label, expected_values):
 
-     # Test multiple groupings
 
-     df = DataFrame({"A": [1, 2, 1], "B": [1, 2, 3]})
 
-     gp = df.groupby([[4, 5, 4], "A", lambda i: 7 if i == 1 else 8], as_index=False)
 
-     result = gp.value_counts(sort=True, normalize=normalize)
 
-     expected = DataFrame(
 
-         {
 
-             "level_0": np.array([4, 4, 5], dtype=np.int_),
 
-             "A": [1, 1, 2],
 
-             "level_2": [8, 8, 7],
 
-             "B": [1, 3, 2],
 
-             expected_label: expected_values,
 
-         }
 
-     )
 
-     tm.assert_frame_equal(result, expected)
 
- @pytest.mark.parametrize(
 
-     "test, columns, expected_names",
 
-     [
 
-         ("repeat", list("abbde"), ["a", None, "d", "b", "b", "e"]),
 
-         ("level", list("abcd") + ["level_1"], ["a", None, "d", "b", "c", "level_1"]),
 
-     ],
 
- )
 
- @pytest.mark.parametrize("as_index", [False, True])
 
- def test_column_label_duplicates(test, columns, expected_names, as_index):
 
-     # GH 44992
 
-     # Test for duplicate input column labels and generated duplicate labels
 
-     df = DataFrame([[1, 3, 5, 7, 9], [2, 4, 6, 8, 10]], columns=columns)
 
-     expected_data = [(1, 0, 7, 3, 5, 9), (2, 1, 8, 4, 6, 10)]
 
-     keys = ["a", np.array([0, 1], dtype=np.int64), "d"]
 
-     result = df.groupby(keys, as_index=as_index).value_counts()
 
-     if as_index:
 
-         expected = Series(
 
-             data=(1, 1),
 
-             index=MultiIndex.from_tuples(
 
-                 expected_data,
 
-                 names=expected_names,
 
-             ),
 
-             name="count",
 
-         )
 
-         tm.assert_series_equal(result, expected)
 
-     else:
 
-         expected_data = [list(row) + [1] for row in expected_data]
 
-         expected_columns = list(expected_names)
 
-         expected_columns[1] = "level_1"
 
-         expected_columns.append("count")
 
-         expected = DataFrame(expected_data, columns=expected_columns)
 
-         tm.assert_frame_equal(result, expected)
 
- @pytest.mark.parametrize(
 
-     "normalize, expected_label",
 
-     [
 
-         (False, "count"),
 
-         (True, "proportion"),
 
-     ],
 
- )
 
- def test_result_label_duplicates(normalize, expected_label):
 
-     # Test for result column label duplicating an input column label
 
-     gb = DataFrame([[1, 2, 3]], columns=["a", "b", expected_label]).groupby(
 
-         "a", as_index=False
 
-     )
 
-     msg = f"Column label '{expected_label}' is duplicate of result column"
 
-     with pytest.raises(ValueError, match=msg):
 
-         gb.value_counts(normalize=normalize)
 
- def test_ambiguous_grouping():
 
-     # Test that groupby is not confused by groupings length equal to row count
 
-     df = DataFrame({"a": [1, 1]})
 
-     gb = df.groupby(np.array([1, 1], dtype=np.int64))
 
-     result = gb.value_counts()
 
-     expected = Series(
 
-         [2], index=MultiIndex.from_tuples([[1, 1]], names=[None, "a"]), name="count"
 
-     )
 
-     tm.assert_series_equal(result, expected)
 
- def test_subset_overlaps_gb_key_raises():
 
-     # GH 46383
 
-     df = DataFrame({"c1": ["a", "b", "c"], "c2": ["x", "y", "y"]}, index=[0, 1, 1])
 
-     msg = "Keys {'c1'} in subset cannot be in the groupby column keys."
 
-     with pytest.raises(ValueError, match=msg):
 
-         df.groupby("c1").value_counts(subset=["c1"])
 
- def test_subset_doesnt_exist_in_frame():
 
-     # GH 46383
 
-     df = DataFrame({"c1": ["a", "b", "c"], "c2": ["x", "y", "y"]}, index=[0, 1, 1])
 
-     msg = "Keys {'c3'} in subset do not exist in the DataFrame."
 
-     with pytest.raises(ValueError, match=msg):
 
-         df.groupby("c1").value_counts(subset=["c3"])
 
- def test_subset():
 
-     # GH 46383
 
-     df = DataFrame({"c1": ["a", "b", "c"], "c2": ["x", "y", "y"]}, index=[0, 1, 1])
 
-     result = df.groupby(level=0).value_counts(subset=["c2"])
 
-     expected = Series(
 
-         [1, 2],
 
-         index=MultiIndex.from_arrays([[0, 1], ["x", "y"]], names=[None, "c2"]),
 
-         name="count",
 
-     )
 
-     tm.assert_series_equal(result, expected)
 
- def test_subset_duplicate_columns():
 
-     # GH 46383
 
-     df = DataFrame(
 
-         [["a", "x", "x"], ["b", "y", "y"], ["b", "y", "y"]],
 
-         index=[0, 1, 1],
 
-         columns=["c1", "c2", "c2"],
 
-     )
 
-     result = df.groupby(level=0).value_counts(subset=["c2"])
 
-     expected = Series(
 
-         [1, 2],
 
-         index=MultiIndex.from_arrays(
 
-             [[0, 1], ["x", "y"], ["x", "y"]], names=[None, "c2", "c2"]
 
-         ),
 
-         name="count",
 
-     )
 
-     tm.assert_series_equal(result, expected)
 
- @pytest.mark.parametrize("utc", [True, False])
 
- def test_value_counts_time_grouper(utc):
 
-     # GH#50486
 
-     df = DataFrame(
 
-         {
 
-             "Timestamp": [
 
-                 1565083561,
 
-                 1565083561 + 86400,
 
-                 1565083561 + 86500,
 
-                 1565083561 + 86400 * 2,
 
-                 1565083561 + 86400 * 3,
 
-                 1565083561 + 86500 * 3,
 
-                 1565083561 + 86400 * 4,
 
-             ],
 
-             "Food": ["apple", "apple", "banana", "banana", "orange", "orange", "pear"],
 
-         }
 
-     ).drop([3])
 
-     df["Datetime"] = to_datetime(df["Timestamp"], utc=utc, unit="s")
 
-     gb = df.groupby(Grouper(freq="1D", key="Datetime"))
 
-     result = gb.value_counts()
 
-     dates = to_datetime(
 
-         ["2019-08-06", "2019-08-07", "2019-08-09", "2019-08-10"], utc=utc
 
-     )
 
-     timestamps = df["Timestamp"].unique()
 
-     index = MultiIndex(
 
-         levels=[dates, timestamps, ["apple", "banana", "orange", "pear"]],
 
-         codes=[[0, 1, 1, 2, 2, 3], range(6), [0, 0, 1, 2, 2, 3]],
 
-         names=["Datetime", "Timestamp", "Food"],
 
-     )
 
-     expected = Series(1, index=index, name="count")
 
-     tm.assert_series_equal(result, expected)
 
 
  |