123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256 |
- import numpy as np
- import pytest
- import pandas.util._test_decorators as td
- from pandas import (
- DataFrame,
- MultiIndex,
- date_range,
- to_datetime,
- )
- import pandas._testing as tm
- class TestMultiIndexPartial:
- def test_getitem_partial_int(self):
- # GH 12416
- # with single item
- l1 = [10, 20]
- l2 = ["a", "b"]
- df = DataFrame(index=range(2), columns=MultiIndex.from_product([l1, l2]))
- expected = DataFrame(index=range(2), columns=l2)
- result = df[20]
- tm.assert_frame_equal(result, expected)
- # with list
- expected = DataFrame(
- index=range(2), columns=MultiIndex.from_product([l1[1:], l2])
- )
- result = df[[20]]
- tm.assert_frame_equal(result, expected)
- # missing item:
- with pytest.raises(KeyError, match="1"):
- df[1]
- with pytest.raises(KeyError, match=r"'\[1\] not in index'"):
- df[[1]]
- def test_series_slice_partial(self):
- pass
- def test_xs_partial(
- self,
- multiindex_dataframe_random_data,
- multiindex_year_month_day_dataframe_random_data,
- ):
- frame = multiindex_dataframe_random_data
- ymd = multiindex_year_month_day_dataframe_random_data
- result = frame.xs("foo")
- result2 = frame.loc["foo"]
- expected = frame.T["foo"].T
- tm.assert_frame_equal(result, expected)
- tm.assert_frame_equal(result, result2)
- result = ymd.xs((2000, 4))
- expected = ymd.loc[2000, 4]
- tm.assert_frame_equal(result, expected)
- # ex from #1796
- index = MultiIndex(
- levels=[["foo", "bar"], ["one", "two"], [-1, 1]],
- codes=[
- [0, 0, 0, 0, 1, 1, 1, 1],
- [0, 0, 1, 1, 0, 0, 1, 1],
- [0, 1, 0, 1, 0, 1, 0, 1],
- ],
- )
- df = DataFrame(np.random.randn(8, 4), index=index, columns=list("abcd"))
- result = df.xs(("foo", "one"))
- expected = df.loc["foo", "one"]
- tm.assert_frame_equal(result, expected)
- def test_getitem_partial(self, multiindex_year_month_day_dataframe_random_data):
- ymd = multiindex_year_month_day_dataframe_random_data
- ymd = ymd.T
- result = ymd[2000, 2]
- expected = ymd.reindex(columns=ymd.columns[ymd.columns.codes[1] == 1])
- expected.columns = expected.columns.droplevel(0).droplevel(0)
- tm.assert_frame_equal(result, expected)
- def test_fancy_slice_partial(
- self,
- multiindex_dataframe_random_data,
- multiindex_year_month_day_dataframe_random_data,
- ):
- frame = multiindex_dataframe_random_data
- result = frame.loc["bar":"baz"]
- expected = frame[3:7]
- tm.assert_frame_equal(result, expected)
- ymd = multiindex_year_month_day_dataframe_random_data
- result = ymd.loc[(2000, 2):(2000, 4)]
- lev = ymd.index.codes[1]
- expected = ymd[(lev >= 1) & (lev <= 3)]
- tm.assert_frame_equal(result, expected)
- def test_getitem_partial_column_select(self):
- idx = MultiIndex(
- codes=[[0, 0, 0], [0, 1, 1], [1, 0, 1]],
- levels=[["a", "b"], ["x", "y"], ["p", "q"]],
- )
- df = DataFrame(np.random.rand(3, 2), index=idx)
- result = df.loc[("a", "y"), :]
- expected = df.loc[("a", "y")]
- tm.assert_frame_equal(result, expected)
- result = df.loc[("a", "y"), [1, 0]]
- expected = df.loc[("a", "y")][[1, 0]]
- tm.assert_frame_equal(result, expected)
- with pytest.raises(KeyError, match=r"\('a', 'foo'\)"):
- df.loc[("a", "foo"), :]
- # TODO(ArrayManager) rewrite test to not use .values
- # exp.loc[2000, 4].values[:] select multiple columns -> .values is not a view
- @td.skip_array_manager_invalid_test
- def test_partial_set(
- self, multiindex_year_month_day_dataframe_random_data, using_copy_on_write
- ):
- # GH #397
- ymd = multiindex_year_month_day_dataframe_random_data
- df = ymd.copy()
- exp = ymd.copy()
- df.loc[2000, 4] = 0
- exp.iloc[65:85] = 0
- tm.assert_frame_equal(df, exp)
- if using_copy_on_write:
- with tm.raises_chained_assignment_error():
- df["A"].loc[2000, 4] = 1
- df.loc[(2000, 4), "A"] = 1
- else:
- df["A"].loc[2000, 4] = 1
- exp.iloc[65:85, 0] = 1
- tm.assert_frame_equal(df, exp)
- df.loc[2000] = 5
- exp.iloc[:100] = 5
- tm.assert_frame_equal(df, exp)
- # this works...for now
- if using_copy_on_write:
- with tm.raises_chained_assignment_error():
- df["A"].iloc[14] = 5
- df["A"].iloc[14] == exp["A"].iloc[14]
- else:
- df["A"].iloc[14] = 5
- assert df["A"].iloc[14] == 5
- @pytest.mark.parametrize("dtype", [int, float])
- def test_getitem_intkey_leading_level(
- self, multiindex_year_month_day_dataframe_random_data, dtype
- ):
- # GH#33355 dont fall-back to positional when leading level is int
- ymd = multiindex_year_month_day_dataframe_random_data
- levels = ymd.index.levels
- ymd.index = ymd.index.set_levels([levels[0].astype(dtype)] + levels[1:])
- ser = ymd["A"]
- mi = ser.index
- assert isinstance(mi, MultiIndex)
- if dtype is int:
- assert mi.levels[0].dtype == np.int_
- else:
- assert mi.levels[0].dtype == np.float64
- assert 14 not in mi.levels[0]
- assert not mi.levels[0]._should_fallback_to_positional
- assert not mi._should_fallback_to_positional
- with pytest.raises(KeyError, match="14"):
- ser[14]
- # ---------------------------------------------------------------------
- def test_setitem_multiple_partial(self, multiindex_dataframe_random_data):
- frame = multiindex_dataframe_random_data
- expected = frame.copy()
- result = frame.copy()
- result.loc[["foo", "bar"]] = 0
- expected.loc["foo"] = 0
- expected.loc["bar"] = 0
- tm.assert_frame_equal(result, expected)
- expected = frame.copy()
- result = frame.copy()
- result.loc["foo":"bar"] = 0
- expected.loc["foo"] = 0
- expected.loc["bar"] = 0
- tm.assert_frame_equal(result, expected)
- expected = frame["A"].copy()
- result = frame["A"].copy()
- result.loc[["foo", "bar"]] = 0
- expected.loc["foo"] = 0
- expected.loc["bar"] = 0
- tm.assert_series_equal(result, expected)
- expected = frame["A"].copy()
- result = frame["A"].copy()
- result.loc["foo":"bar"] = 0
- expected.loc["foo"] = 0
- expected.loc["bar"] = 0
- tm.assert_series_equal(result, expected)
- @pytest.mark.parametrize(
- "indexer, exp_idx, exp_values",
- [
- (slice("2019-2", None), [to_datetime("2019-02-01")], [2, 3]),
- (
- slice(None, "2019-2"),
- date_range("2019", periods=2, freq="MS"),
- [0, 1, 2, 3],
- ),
- ],
- )
- def test_partial_getitem_loc_datetime(self, indexer, exp_idx, exp_values):
- # GH: 25165
- date_idx = date_range("2019", periods=2, freq="MS")
- df = DataFrame(
- list(range(4)),
- index=MultiIndex.from_product([date_idx, [0, 1]], names=["x", "y"]),
- )
- expected = DataFrame(
- exp_values,
- index=MultiIndex.from_product([exp_idx, [0, 1]], names=["x", "y"]),
- )
- result = df[indexer]
- tm.assert_frame_equal(result, expected)
- result = df.loc[indexer]
- tm.assert_frame_equal(result, expected)
- result = df.loc(axis=0)[indexer]
- tm.assert_frame_equal(result, expected)
- result = df.loc[indexer, :]
- tm.assert_frame_equal(result, expected)
- df2 = df.swaplevel(0, 1).sort_index()
- expected = expected.swaplevel(0, 1).sort_index()
- result = df2.loc[:, indexer, :]
- tm.assert_frame_equal(result, expected)
- def test_loc_getitem_partial_both_axis():
- # gh-12660
- iterables = [["a", "b"], [2, 1]]
- columns = MultiIndex.from_product(iterables, names=["col1", "col2"])
- rows = MultiIndex.from_product(iterables, names=["row1", "row2"])
- df = DataFrame(np.random.randn(4, 4), index=rows, columns=columns)
- expected = df.iloc[:2, 2:].droplevel("row1").droplevel("col1", axis=1)
- result = df.loc["a", "b"]
- tm.assert_frame_equal(result, expected)
|