| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332 | 
							- # Test GroupBy._positional_selector positional grouped indexing GH#42864
 
- import random
 
- import numpy as np
 
- import pytest
 
- import pandas as pd
 
- import pandas._testing as tm
 
- @pytest.mark.parametrize(
 
-     "arg, expected_rows",
 
-     [
 
-         [0, [0, 1, 4]],
 
-         [2, [5]],
 
-         [5, []],
 
-         [-1, [3, 4, 7]],
 
-         [-2, [1, 6]],
 
-         [-6, []],
 
-     ],
 
- )
 
- def test_int(slice_test_df, slice_test_grouped, arg, expected_rows):
 
-     # Test single integer
 
-     result = slice_test_grouped._positional_selector[arg]
 
-     expected = slice_test_df.iloc[expected_rows]
 
-     tm.assert_frame_equal(result, expected)
 
- def test_slice(slice_test_df, slice_test_grouped):
 
-     # Test single slice
 
-     result = slice_test_grouped._positional_selector[0:3:2]
 
-     expected = slice_test_df.iloc[[0, 1, 4, 5]]
 
-     tm.assert_frame_equal(result, expected)
 
- @pytest.mark.parametrize(
 
-     "arg, expected_rows",
 
-     [
 
-         [[0, 2], [0, 1, 4, 5]],
 
-         [[0, 2, -1], [0, 1, 3, 4, 5, 7]],
 
-         [range(0, 3, 2), [0, 1, 4, 5]],
 
-         [{0, 2}, [0, 1, 4, 5]],
 
-     ],
 
-     ids=[
 
-         "list",
 
-         "negative",
 
-         "range",
 
-         "set",
 
-     ],
 
- )
 
- def test_list(slice_test_df, slice_test_grouped, arg, expected_rows):
 
-     # Test lists of integers and integer valued iterables
 
-     result = slice_test_grouped._positional_selector[arg]
 
-     expected = slice_test_df.iloc[expected_rows]
 
-     tm.assert_frame_equal(result, expected)
 
- def test_ints(slice_test_df, slice_test_grouped):
 
-     # Test tuple of ints
 
-     result = slice_test_grouped._positional_selector[0, 2, -1]
 
-     expected = slice_test_df.iloc[[0, 1, 3, 4, 5, 7]]
 
-     tm.assert_frame_equal(result, expected)
 
- def test_slices(slice_test_df, slice_test_grouped):
 
-     # Test tuple of slices
 
-     result = slice_test_grouped._positional_selector[:2, -2:]
 
-     expected = slice_test_df.iloc[[0, 1, 2, 3, 4, 6, 7]]
 
-     tm.assert_frame_equal(result, expected)
 
- def test_mix(slice_test_df, slice_test_grouped):
 
-     # Test mixed tuple of ints and slices
 
-     result = slice_test_grouped._positional_selector[0, 1, -2:]
 
-     expected = slice_test_df.iloc[[0, 1, 2, 3, 4, 6, 7]]
 
-     tm.assert_frame_equal(result, expected)
 
- @pytest.mark.parametrize(
 
-     "arg, expected_rows",
 
-     [
 
-         [0, [0, 1, 4]],
 
-         [[0, 2, -1], [0, 1, 3, 4, 5, 7]],
 
-         [(slice(None, 2), slice(-2, None)), [0, 1, 2, 3, 4, 6, 7]],
 
-     ],
 
- )
 
- def test_as_index(slice_test_df, arg, expected_rows):
 
-     # Test the default as_index behaviour
 
-     result = slice_test_df.groupby("Group", sort=False)._positional_selector[arg]
 
-     expected = slice_test_df.iloc[expected_rows]
 
-     tm.assert_frame_equal(result, expected)
 
- def test_doc_examples():
 
-     # Test the examples in the documentation
 
-     df = pd.DataFrame(
 
-         [["a", 1], ["a", 2], ["a", 3], ["b", 4], ["b", 5]], columns=["A", "B"]
 
-     )
 
-     grouped = df.groupby("A", as_index=False)
 
-     result = grouped._positional_selector[1:2]
 
-     expected = pd.DataFrame([["a", 2], ["b", 5]], columns=["A", "B"], index=[1, 4])
 
-     tm.assert_frame_equal(result, expected)
 
-     result = grouped._positional_selector[1, -1]
 
-     expected = pd.DataFrame(
 
-         [["a", 2], ["a", 3], ["b", 5]], columns=["A", "B"], index=[1, 2, 4]
 
-     )
 
-     tm.assert_frame_equal(result, expected)
 
- @pytest.fixture()
 
- def multiindex_data():
 
-     ndates = 100
 
-     nitems = 20
 
-     dates = pd.date_range("20130101", periods=ndates, freq="D")
 
-     items = [f"item {i}" for i in range(nitems)]
 
-     data = {}
 
-     for date in dates:
 
-         nitems_for_date = nitems - random.randint(0, 12)
 
-         levels = [
 
-             (item, random.randint(0, 10000) / 100, random.randint(0, 10000) / 100)
 
-             for item in items[:nitems_for_date]
 
-         ]
 
-         levels.sort(key=lambda x: x[1])
 
-         data[date] = levels
 
-     return data
 
- def _make_df_from_data(data):
 
-     rows = {}
 
-     for date in data:
 
-         for level in data[date]:
 
-             rows[(date, level[0])] = {"A": level[1], "B": level[2]}
 
-     df = pd.DataFrame.from_dict(rows, orient="index")
 
-     df.index.names = ("Date", "Item")
 
-     return df
 
- def test_multiindex(multiindex_data):
 
-     # Test the multiindex mentioned as the use-case in the documentation
 
-     df = _make_df_from_data(multiindex_data)
 
-     result = df.groupby("Date", as_index=False).nth(slice(3, -3))
 
-     sliced = {date: multiindex_data[date][3:-3] for date in multiindex_data}
 
-     expected = _make_df_from_data(sliced)
 
-     tm.assert_frame_equal(result, expected)
 
- @pytest.mark.parametrize("arg", [1, 5, 30, 1000, -1, -5, -30, -1000])
 
- @pytest.mark.parametrize("method", ["head", "tail"])
 
- @pytest.mark.parametrize("simulated", [True, False])
 
- def test_against_head_and_tail(arg, method, simulated):
 
-     # Test gives the same results as grouped head and tail
 
-     n_groups = 100
 
-     n_rows_per_group = 30
 
-     data = {
 
-         "group": [
 
-             f"group {g}" for j in range(n_rows_per_group) for g in range(n_groups)
 
-         ],
 
-         "value": [
 
-             f"group {g} row {j}"
 
-             for j in range(n_rows_per_group)
 
-             for g in range(n_groups)
 
-         ],
 
-     }
 
-     df = pd.DataFrame(data)
 
-     grouped = df.groupby("group", as_index=False)
 
-     size = arg if arg >= 0 else n_rows_per_group + arg
 
-     if method == "head":
 
-         result = grouped._positional_selector[:arg]
 
-         if simulated:
 
-             indices = []
 
-             for j in range(size):
 
-                 for i in range(n_groups):
 
-                     if j * n_groups + i < n_groups * n_rows_per_group:
 
-                         indices.append(j * n_groups + i)
 
-             expected = df.iloc[indices]
 
-         else:
 
-             expected = grouped.head(arg)
 
-     else:
 
-         result = grouped._positional_selector[-arg:]
 
-         if simulated:
 
-             indices = []
 
-             for j in range(size):
 
-                 for i in range(n_groups):
 
-                     if (n_rows_per_group + j - size) * n_groups + i >= 0:
 
-                         indices.append((n_rows_per_group + j - size) * n_groups + i)
 
-             expected = df.iloc[indices]
 
-         else:
 
-             expected = grouped.tail(arg)
 
-     tm.assert_frame_equal(result, expected)
 
- @pytest.mark.parametrize("start", [None, 0, 1, 10, -1, -10])
 
- @pytest.mark.parametrize("stop", [None, 0, 1, 10, -1, -10])
 
- @pytest.mark.parametrize("step", [None, 1, 5])
 
- def test_against_df_iloc(start, stop, step):
 
-     # Test that a single group gives the same results as DataFrame.iloc
 
-     n_rows = 30
 
-     data = {
 
-         "group": ["group 0"] * n_rows,
 
-         "value": list(range(n_rows)),
 
-     }
 
-     df = pd.DataFrame(data)
 
-     grouped = df.groupby("group", as_index=False)
 
-     result = grouped._positional_selector[start:stop:step]
 
-     expected = df.iloc[start:stop:step]
 
-     tm.assert_frame_equal(result, expected)
 
- def test_series():
 
-     # Test grouped Series
 
-     ser = pd.Series([1, 2, 3, 4, 5], index=["a", "a", "a", "b", "b"])
 
-     grouped = ser.groupby(level=0)
 
-     result = grouped._positional_selector[1:2]
 
-     expected = pd.Series([2, 5], index=["a", "b"])
 
-     tm.assert_series_equal(result, expected)
 
- @pytest.mark.parametrize("step", [1, 2, 3, 4, 5])
 
- def test_step(step):
 
-     # Test slice with various step values
 
-     data = [["x", f"x{i}"] for i in range(5)]
 
-     data += [["y", f"y{i}"] for i in range(4)]
 
-     data += [["z", f"z{i}"] for i in range(3)]
 
-     df = pd.DataFrame(data, columns=["A", "B"])
 
-     grouped = df.groupby("A", as_index=False)
 
-     result = grouped._positional_selector[::step]
 
-     data = [["x", f"x{i}"] for i in range(0, 5, step)]
 
-     data += [["y", f"y{i}"] for i in range(0, 4, step)]
 
-     data += [["z", f"z{i}"] for i in range(0, 3, step)]
 
-     index = [0 + i for i in range(0, 5, step)]
 
-     index += [5 + i for i in range(0, 4, step)]
 
-     index += [9 + i for i in range(0, 3, step)]
 
-     expected = pd.DataFrame(data, columns=["A", "B"], index=index)
 
-     tm.assert_frame_equal(result, expected)
 
- @pytest.fixture()
 
- def column_group_df():
 
-     return pd.DataFrame(
 
-         [[0, 1, 2, 3, 4, 5, 6], [0, 0, 1, 0, 1, 0, 2]],
 
-         columns=["A", "B", "C", "D", "E", "F", "G"],
 
-     )
 
- def test_column_axis(column_group_df):
 
-     g = column_group_df.groupby(column_group_df.iloc[1], axis=1)
 
-     result = g._positional_selector[1:-1]
 
-     expected = column_group_df.iloc[:, [1, 3]]
 
-     tm.assert_frame_equal(result, expected)
 
- def test_columns_on_iter():
 
-     # GitHub issue #44821
 
-     df = pd.DataFrame({k: range(10) for k in "ABC"})
 
-     # Group-by and select columns
 
-     cols = ["A", "B"]
 
-     for _, dg in df.groupby(df.A < 4)[cols]:
 
-         tm.assert_index_equal(dg.columns, pd.Index(cols))
 
-         assert "C" not in dg.columns
 
- @pytest.mark.parametrize("func", [list, pd.Index, pd.Series, np.array])
 
- def test_groupby_duplicated_columns(func):
 
-     # GH#44924
 
-     df = pd.DataFrame(
 
-         {
 
-             "A": [1, 2],
 
-             "B": [3, 3],
 
-             "C": ["G", "G"],
 
-         }
 
-     )
 
-     result = df.groupby("C")[func(["A", "B", "A"])].mean()
 
-     expected = pd.DataFrame(
 
-         [[1.5, 3.0, 1.5]], columns=["A", "B", "A"], index=pd.Index(["G"], name="C")
 
-     )
 
-     tm.assert_frame_equal(result, expected)
 
- def test_groupby_get_nonexisting_groups():
 
-     # GH#32492
 
-     df = pd.DataFrame(
 
-         data={
 
-             "A": ["a1", "a2", None],
 
-             "B": ["b1", "b2", "b1"],
 
-             "val": [1, 2, 3],
 
-         }
 
-     )
 
-     grps = df.groupby(by=["A", "B"])
 
-     msg = "('a2', 'b1')"
 
-     with pytest.raises(KeyError, match=msg):
 
-         grps.get_group(("a2", "b1"))
 
 
  |