1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889 |
- import numpy as np
- import pytest
- from pandas import (
- PeriodIndex,
- Series,
- date_range,
- period_range,
- timedelta_range,
- )
- import pandas._testing as tm
- class DropDuplicates:
- def test_drop_duplicates_metadata(self, idx):
- # GH#10115
- result = idx.drop_duplicates()
- tm.assert_index_equal(idx, result)
- assert idx.freq == result.freq
- idx_dup = idx.append(idx)
- result = idx_dup.drop_duplicates()
- expected = idx
- if not isinstance(idx, PeriodIndex):
- # freq is reset except for PeriodIndex
- assert idx_dup.freq is None
- assert result.freq is None
- expected = idx._with_freq(None)
- else:
- assert result.freq == expected.freq
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize(
- "keep, expected, index",
- [
- (
- "first",
- np.concatenate(([False] * 10, [True] * 5)),
- np.arange(0, 10, dtype=np.int64),
- ),
- (
- "last",
- np.concatenate(([True] * 5, [False] * 10)),
- np.arange(5, 15, dtype=np.int64),
- ),
- (
- False,
- np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
- np.arange(5, 10, dtype=np.int64),
- ),
- ],
- )
- def test_drop_duplicates(self, keep, expected, index, idx):
- # to check Index/Series compat
- idx = idx.append(idx[:5])
- tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
- expected = idx[~expected]
- result = idx.drop_duplicates(keep=keep)
- tm.assert_index_equal(result, expected)
- result = Series(idx).drop_duplicates(keep=keep)
- expected = Series(expected, index=index)
- tm.assert_series_equal(result, expected)
- class TestDropDuplicatesPeriodIndex(DropDuplicates):
- @pytest.fixture(params=["D", "3D", "H", "2H", "T", "2T", "S", "3S"])
- def freq(self, request):
- return request.param
- @pytest.fixture
- def idx(self, freq):
- return period_range("2011-01-01", periods=10, freq=freq, name="idx")
- class TestDropDuplicatesDatetimeIndex(DropDuplicates):
- @pytest.fixture
- def idx(self, freq_sample):
- return date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
- class TestDropDuplicatesTimedeltaIndex(DropDuplicates):
- @pytest.fixture
- def idx(self, freq_sample):
- return timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")
|