123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259 |
- from datetime import datetime
- import numpy as np
- import pytest
- import pandas as pd
- from pandas import (
- Index,
- Series,
- )
- import pandas._testing as tm
- from pandas.core.algorithms import safe_sort
- class TestIndexSetOps:
- @pytest.mark.parametrize(
- "method", ["union", "intersection", "difference", "symmetric_difference"]
- )
- def test_setops_sort_validation(self, method):
- idx1 = Index(["a", "b"])
- idx2 = Index(["b", "c"])
- with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
- getattr(idx1, method)(idx2, sort=2)
- # sort=True is supported as of GH#??
- getattr(idx1, method)(idx2, sort=True)
- def test_setops_preserve_object_dtype(self):
- idx = Index([1, 2, 3], dtype=object)
- result = idx.intersection(idx[1:])
- expected = idx[1:]
- tm.assert_index_equal(result, expected)
- # if other is not monotonic increasing, intersection goes through
- # a different route
- result = idx.intersection(idx[1:][::-1])
- tm.assert_index_equal(result, expected)
- result = idx._union(idx[1:], sort=None)
- expected = idx
- tm.assert_numpy_array_equal(result, expected.values)
- result = idx.union(idx[1:], sort=None)
- tm.assert_index_equal(result, expected)
- # if other is not monotonic increasing, _union goes through
- # a different route
- result = idx._union(idx[1:][::-1], sort=None)
- tm.assert_numpy_array_equal(result, expected.values)
- result = idx.union(idx[1:][::-1], sort=None)
- tm.assert_index_equal(result, expected)
- def test_union_base(self):
- index = Index([0, "a", 1, "b", 2, "c"])
- first = index[3:]
- second = index[:5]
- result = first.union(second)
- expected = Index([0, 1, 2, "a", "b", "c"])
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize("klass", [np.array, Series, list])
- def test_union_different_type_base(self, klass):
- # GH 10149
- index = Index([0, "a", 1, "b", 2, "c"])
- first = index[3:]
- second = index[:5]
- result = first.union(klass(second.values))
- assert tm.equalContents(result, index)
- def test_union_sort_other_incomparable(self):
- # https://github.com/pandas-dev/pandas/issues/24959
- idx = Index([1, pd.Timestamp("2000")])
- # default (sort=None)
- with tm.assert_produces_warning(RuntimeWarning):
- result = idx.union(idx[:1])
- tm.assert_index_equal(result, idx)
- # sort=None
- with tm.assert_produces_warning(RuntimeWarning):
- result = idx.union(idx[:1], sort=None)
- tm.assert_index_equal(result, idx)
- # sort=False
- result = idx.union(idx[:1], sort=False)
- tm.assert_index_equal(result, idx)
- def test_union_sort_other_incomparable_true(self):
- idx = Index([1, pd.Timestamp("2000")])
- with pytest.raises(TypeError, match=".*"):
- idx.union(idx[:1], sort=True)
- def test_intersection_equal_sort_true(self):
- idx = Index(["c", "a", "b"])
- sorted_ = Index(["a", "b", "c"])
- tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)
- def test_intersection_base(self, sort):
- # (same results for py2 and py3 but sortedness not tested elsewhere)
- index = Index([0, "a", 1, "b", 2, "c"])
- first = index[:5]
- second = index[:3]
- expected = Index([0, 1, "a"]) if sort is None else Index([0, "a", 1])
- result = first.intersection(second, sort=sort)
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize("klass", [np.array, Series, list])
- def test_intersection_different_type_base(self, klass, sort):
- # GH 10149
- index = Index([0, "a", 1, "b", 2, "c"])
- first = index[:5]
- second = index[:3]
- result = first.intersection(klass(second.values), sort=sort)
- assert tm.equalContents(result, second)
- def test_intersection_nosort(self):
- result = Index(["c", "b", "a"]).intersection(["b", "a"])
- expected = Index(["b", "a"])
- tm.assert_index_equal(result, expected)
- def test_intersection_equal_sort(self):
- idx = Index(["c", "a", "b"])
- tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
- tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
- def test_intersection_str_dates(self, sort):
- dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]
- i1 = Index(dt_dates, dtype=object)
- i2 = Index(["aa"], dtype=object)
- result = i2.intersection(i1, sort=sort)
- assert len(result) == 0
- @pytest.mark.parametrize(
- "index2,expected_arr",
- [(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B"])],
- )
- def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort):
- # non-monotonic non-unique
- index1 = Index(["A", "B", "A", "C"])
- expected = Index(expected_arr, dtype="object")
- result = index1.intersection(index2, sort=sort)
- if sort is None:
- expected = expected.sort_values()
- tm.assert_index_equal(result, expected)
- def test_difference_base(self, sort):
- # (same results for py2 and py3 but sortedness not tested elsewhere)
- index = Index([0, "a", 1, "b", 2, "c"])
- first = index[:4]
- second = index[3:]
- result = first.difference(second, sort)
- expected = Index([0, "a", 1])
- if sort is None:
- expected = Index(safe_sort(expected))
- tm.assert_index_equal(result, expected)
- def test_symmetric_difference(self):
- # (same results for py2 and py3 but sortedness not tested elsewhere)
- index = Index([0, "a", 1, "b", 2, "c"])
- first = index[:4]
- second = index[3:]
- result = first.symmetric_difference(second)
- expected = Index([0, 1, 2, "a", "c"])
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize(
- "method,expected,sort",
- [
- (
- "intersection",
- np.array(
- [(1, "A"), (2, "A"), (1, "B"), (2, "B")],
- dtype=[("num", int), ("let", "a1")],
- ),
- False,
- ),
- (
- "intersection",
- np.array(
- [(1, "A"), (1, "B"), (2, "A"), (2, "B")],
- dtype=[("num", int), ("let", "a1")],
- ),
- None,
- ),
- (
- "union",
- np.array(
- [(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")],
- dtype=[("num", int), ("let", "a1")],
- ),
- None,
- ),
- ],
- )
- def test_tuple_union_bug(self, method, expected, sort):
- index1 = Index(
- np.array(
- [(1, "A"), (2, "A"), (1, "B"), (2, "B")],
- dtype=[("num", int), ("let", "a1")],
- )
- )
- index2 = Index(
- np.array(
- [(1, "A"), (2, "A"), (1, "B"), (2, "B"), (1, "C"), (2, "C")],
- dtype=[("num", int), ("let", "a1")],
- )
- )
- result = getattr(index1, method)(index2, sort=sort)
- assert result.ndim == 1
- expected = Index(expected)
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize("first_list", [["b", "a"], []])
- @pytest.mark.parametrize("second_list", [["a", "b"], []])
- @pytest.mark.parametrize(
- "first_name, second_name, expected_name",
- [("A", "B", None), (None, "B", None), ("A", None, None)],
- )
- def test_union_name_preservation(
- self, first_list, second_list, first_name, second_name, expected_name, sort
- ):
- first = Index(first_list, name=first_name)
- second = Index(second_list, name=second_name)
- union = first.union(second, sort=sort)
- vals = set(first_list).union(second_list)
- if sort is None and len(first_list) > 0 and len(second_list) > 0:
- expected = Index(sorted(vals), name=expected_name)
- tm.assert_index_equal(union, expected)
- else:
- expected = Index(vals, name=expected_name)
- tm.equalContents(union, expected)
- @pytest.mark.parametrize(
- "diff_type, expected",
- [["difference", [1, "B"]], ["symmetric_difference", [1, 2, "B", "C"]]],
- )
- def test_difference_object_type(self, diff_type, expected):
- # GH 13432
- idx1 = Index([0, 1, "A", "B"])
- idx2 = Index([0, 2, "A", "C"])
- result = getattr(idx1, diff_type)(idx2)
- expected = Index(expected)
- tm.assert_index_equal(result, expected)
|