test_drop_duplicates.py 2.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889
  1. import numpy as np
  2. import pytest
  3. from pandas import (
  4. PeriodIndex,
  5. Series,
  6. date_range,
  7. period_range,
  8. timedelta_range,
  9. )
  10. import pandas._testing as tm
  11. class DropDuplicates:
  12. def test_drop_duplicates_metadata(self, idx):
  13. # GH#10115
  14. result = idx.drop_duplicates()
  15. tm.assert_index_equal(idx, result)
  16. assert idx.freq == result.freq
  17. idx_dup = idx.append(idx)
  18. result = idx_dup.drop_duplicates()
  19. expected = idx
  20. if not isinstance(idx, PeriodIndex):
  21. # freq is reset except for PeriodIndex
  22. assert idx_dup.freq is None
  23. assert result.freq is None
  24. expected = idx._with_freq(None)
  25. else:
  26. assert result.freq == expected.freq
  27. tm.assert_index_equal(result, expected)
  28. @pytest.mark.parametrize(
  29. "keep, expected, index",
  30. [
  31. (
  32. "first",
  33. np.concatenate(([False] * 10, [True] * 5)),
  34. np.arange(0, 10, dtype=np.int64),
  35. ),
  36. (
  37. "last",
  38. np.concatenate(([True] * 5, [False] * 10)),
  39. np.arange(5, 15, dtype=np.int64),
  40. ),
  41. (
  42. False,
  43. np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
  44. np.arange(5, 10, dtype=np.int64),
  45. ),
  46. ],
  47. )
  48. def test_drop_duplicates(self, keep, expected, index, idx):
  49. # to check Index/Series compat
  50. idx = idx.append(idx[:5])
  51. tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
  52. expected = idx[~expected]
  53. result = idx.drop_duplicates(keep=keep)
  54. tm.assert_index_equal(result, expected)
  55. result = Series(idx).drop_duplicates(keep=keep)
  56. expected = Series(expected, index=index)
  57. tm.assert_series_equal(result, expected)
  58. class TestDropDuplicatesPeriodIndex(DropDuplicates):
  59. @pytest.fixture(params=["D", "3D", "H", "2H", "T", "2T", "S", "3S"])
  60. def freq(self, request):
  61. return request.param
  62. @pytest.fixture
  63. def idx(self, freq):
  64. return period_range("2011-01-01", periods=10, freq=freq, name="idx")
  65. class TestDropDuplicatesDatetimeIndex(DropDuplicates):
  66. @pytest.fixture
  67. def idx(self, freq_sample):
  68. return date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
  69. class TestDropDuplicatesTimedeltaIndex(DropDuplicates):
  70. @pytest.fixture
  71. def idx(self, freq_sample):
  72. return timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")