test_duplicated.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. import numpy as np
  2. import pytest
  3. from pandas import (
  4. NA,
  5. Categorical,
  6. Series,
  7. )
  8. import pandas._testing as tm
  9. @pytest.mark.parametrize(
  10. "keep, expected",
  11. [
  12. ("first", Series([False, False, True, False, True], name="name")),
  13. ("last", Series([True, True, False, False, False], name="name")),
  14. (False, Series([True, True, True, False, True], name="name")),
  15. ],
  16. )
  17. def test_duplicated_keep(keep, expected):
  18. ser = Series(["a", "b", "b", "c", "a"], name="name")
  19. result = ser.duplicated(keep=keep)
  20. tm.assert_series_equal(result, expected)
  21. @pytest.mark.parametrize(
  22. "keep, expected",
  23. [
  24. ("first", Series([False, False, True, False, True])),
  25. ("last", Series([True, True, False, False, False])),
  26. (False, Series([True, True, True, False, True])),
  27. ],
  28. )
  29. def test_duplicated_nan_none(keep, expected):
  30. ser = Series([np.nan, 3, 3, None, np.nan], dtype=object)
  31. result = ser.duplicated(keep=keep)
  32. tm.assert_series_equal(result, expected)
  33. def test_duplicated_categorical_bool_na(nulls_fixture):
  34. # GH#44351
  35. ser = Series(
  36. Categorical(
  37. [True, False, True, False, nulls_fixture],
  38. categories=[True, False],
  39. ordered=True,
  40. )
  41. )
  42. result = ser.duplicated()
  43. expected = Series([False, False, True, True, False])
  44. tm.assert_series_equal(result, expected)
  45. @pytest.mark.parametrize(
  46. "keep, vals",
  47. [
  48. ("last", [True, True, False]),
  49. ("first", [False, True, True]),
  50. (False, [True, True, True]),
  51. ],
  52. )
  53. def test_duplicated_mask(keep, vals):
  54. # GH#48150
  55. ser = Series([1, 2, NA, NA, NA], dtype="Int64")
  56. result = ser.duplicated(keep=keep)
  57. expected = Series([False, False] + vals)
  58. tm.assert_series_equal(result, expected)
  59. def test_duplicated_mask_no_duplicated_na(keep):
  60. # GH#48150
  61. ser = Series([1, 2, NA], dtype="Int64")
  62. result = ser.duplicated(keep=keep)
  63. expected = Series([False, False, False])
  64. tm.assert_series_equal(result, expected)