123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148 |
- import numpy as np
- import pytest
- from pandas import (
- DataFrame,
- IndexSlice,
- MultiIndex,
- date_range,
- )
- import pandas._testing as tm
- @pytest.fixture
- def df():
- # c1
- # 2016-01-01 00:00:00 a 0
- # b 1
- # c 2
- # 2016-01-01 12:00:00 a 3
- # b 4
- # c 5
- # 2016-01-02 00:00:00 a 6
- # b 7
- # c 8
- # 2016-01-02 12:00:00 a 9
- # b 10
- # c 11
- # 2016-01-03 00:00:00 a 12
- # b 13
- # c 14
- dr = date_range("2016-01-01", "2016-01-03", freq="12H")
- abc = ["a", "b", "c"]
- mi = MultiIndex.from_product([dr, abc])
- frame = DataFrame({"c1": range(0, 15)}, index=mi)
- return frame
- def test_partial_string_matching_single_index(df):
- # partial string matching on a single index
- for df_swap in [df.swaplevel(), df.swaplevel(0), df.swaplevel(0, 1)]:
- df_swap = df_swap.sort_index()
- just_a = df_swap.loc["a"]
- result = just_a.loc["2016-01-01"]
- expected = df.loc[IndexSlice[:, "a"], :].iloc[0:2]
- expected.index = expected.index.droplevel(1)
- tm.assert_frame_equal(result, expected)
- def test_get_loc_partial_timestamp_multiindex(df):
- mi = df.index
- key = ("2016-01-01", "a")
- loc = mi.get_loc(key)
- expected = np.zeros(len(mi), dtype=bool)
- expected[[0, 3]] = True
- tm.assert_numpy_array_equal(loc, expected)
- key2 = ("2016-01-02", "a")
- loc2 = mi.get_loc(key2)
- expected2 = np.zeros(len(mi), dtype=bool)
- expected2[[6, 9]] = True
- tm.assert_numpy_array_equal(loc2, expected2)
- key3 = ("2016-01", "a")
- loc3 = mi.get_loc(key3)
- expected3 = np.zeros(len(mi), dtype=bool)
- expected3[mi.get_level_values(1).get_loc("a")] = True
- tm.assert_numpy_array_equal(loc3, expected3)
- key4 = ("2016", "a")
- loc4 = mi.get_loc(key4)
- expected4 = expected3
- tm.assert_numpy_array_equal(loc4, expected4)
- # non-monotonic
- taker = np.arange(len(mi), dtype=np.intp)
- taker[::2] = taker[::-2]
- mi2 = mi.take(taker)
- loc5 = mi2.get_loc(key)
- expected5 = np.zeros(len(mi2), dtype=bool)
- expected5[[3, 14]] = True
- tm.assert_numpy_array_equal(loc5, expected5)
- def test_partial_string_timestamp_multiindex(df):
- # GH10331
- df_swap = df.swaplevel(0, 1).sort_index()
- SLC = IndexSlice
- # indexing with IndexSlice
- result = df.loc[SLC["2016-01-01":"2016-02-01", :], :]
- expected = df
- tm.assert_frame_equal(result, expected)
- # match on secondary index
- result = df_swap.loc[SLC[:, "2016-01-01":"2016-01-01"], :]
- expected = df_swap.iloc[[0, 1, 5, 6, 10, 11]]
- tm.assert_frame_equal(result, expected)
- # partial string match on year only
- result = df.loc["2016"]
- expected = df
- tm.assert_frame_equal(result, expected)
- # partial string match on date
- result = df.loc["2016-01-01"]
- expected = df.iloc[0:6]
- tm.assert_frame_equal(result, expected)
- # partial string match on date and hour, from middle
- result = df.loc["2016-01-02 12"]
- # hourly resolution, same as index.levels[0], so we are _not_ slicing on
- # that level, so that level gets dropped
- expected = df.iloc[9:12].droplevel(0)
- tm.assert_frame_equal(result, expected)
- # partial string match on secondary index
- result = df_swap.loc[SLC[:, "2016-01-02"], :]
- expected = df_swap.iloc[[2, 3, 7, 8, 12, 13]]
- tm.assert_frame_equal(result, expected)
- # tuple selector with partial string match on date
- # "2016-01-01" has daily resolution, so _is_ a slice on the first level.
- result = df.loc[("2016-01-01", "a"), :]
- expected = df.iloc[[0, 3]]
- expected = df.iloc[[0, 3]].droplevel(1)
- tm.assert_frame_equal(result, expected)
- # Slicing date on first level should break (of course) bc the DTI is the
- # second level on df_swap
- with pytest.raises(KeyError, match="'2016-01-01'"):
- df_swap.loc["2016-01-01"]
- def test_partial_string_timestamp_multiindex_str_key_raises(df):
- # Even though this syntax works on a single index, this is somewhat
- # ambiguous and we don't want to extend this behavior forward to work
- # in multi-indexes. This would amount to selecting a scalar from a
- # column.
- with pytest.raises(KeyError, match="'2016-01-01'"):
- df["2016-01-01"]
- def test_partial_string_timestamp_multiindex_daily_resolution(df):
- # GH12685 (partial string with daily resolution or below)
- result = df.loc[IndexSlice["2013-03":"2013-03", :], :]
- expected = df.iloc[118:180]
- tm.assert_frame_equal(result, expected)
|