123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888 |
- """
- The tests in this package are to ensure the proper resultant dtypes of
- set operations.
- """
- from datetime import datetime
- import operator
- import numpy as np
- import pytest
- from pandas.core.dtypes.cast import find_common_type
- from pandas import (
- CategoricalIndex,
- Index,
- MultiIndex,
- RangeIndex,
- Series,
- Timestamp,
- )
- import pandas._testing as tm
- from pandas.api.types import (
- is_bool_dtype,
- is_datetime64tz_dtype,
- is_signed_integer_dtype,
- pandas_dtype,
- )
- def test_union_same_types(index):
- # Union with a non-unique, non-monotonic index raises error
- # Only needed for bool index factory
- idx1 = index.sort_values()
- idx2 = index.sort_values()
- assert idx1.union(idx2).dtype == idx1.dtype
- def test_union_different_types(index_flat, index_flat2, request):
- # This test only considers combinations of indices
- # GH 23525
- idx1 = index_flat
- idx2 = index_flat2
- if (
- not idx1.is_unique
- and not idx2.is_unique
- and idx1.dtype.kind == "i"
- and idx2.dtype.kind == "b"
- ) or (
- not idx2.is_unique
- and not idx1.is_unique
- and idx2.dtype.kind == "i"
- and idx1.dtype.kind == "b"
- ):
- # Each condition had idx[1|2].is_monotonic_decreasing
- # but failed when e.g.
- # idx1 = Index(
- # [True, True, True, True, True, True, True, True, False, False], dtype='bool'
- # )
- # idx2 = Index([0, 0, 1, 1, 2, 2], dtype='int64')
- mark = pytest.mark.xfail(
- reason="GH#44000 True==1", raises=ValueError, strict=False
- )
- request.node.add_marker(mark)
- common_dtype = find_common_type([idx1.dtype, idx2.dtype])
- warn = None
- if not len(idx1) or not len(idx2):
- pass
- elif (
- idx1.dtype.kind == "c"
- and (
- idx2.dtype.kind not in ["i", "u", "f", "c"]
- or not isinstance(idx2.dtype, np.dtype)
- )
- ) or (
- idx2.dtype.kind == "c"
- and (
- idx1.dtype.kind not in ["i", "u", "f", "c"]
- or not isinstance(idx1.dtype, np.dtype)
- )
- ):
- # complex objects non-sortable
- warn = RuntimeWarning
- any_uint64 = np.uint64 in (idx1.dtype, idx2.dtype)
- idx1_signed = is_signed_integer_dtype(idx1.dtype)
- idx2_signed = is_signed_integer_dtype(idx2.dtype)
- # Union with a non-unique, non-monotonic index raises error
- # This applies to the boolean index
- idx1 = idx1.sort_values()
- idx2 = idx2.sort_values()
- with tm.assert_produces_warning(warn, match="'<' not supported between"):
- res1 = idx1.union(idx2)
- res2 = idx2.union(idx1)
- if any_uint64 and (idx1_signed or idx2_signed):
- assert res1.dtype == np.dtype("O")
- assert res2.dtype == np.dtype("O")
- else:
- assert res1.dtype == common_dtype
- assert res2.dtype == common_dtype
- @pytest.mark.parametrize(
- "idx_fact1,idx_fact2",
- [
- (tm.makeIntIndex, tm.makeRangeIndex),
- (tm.makeFloatIndex, tm.makeIntIndex),
- (tm.makeFloatIndex, tm.makeRangeIndex),
- (tm.makeFloatIndex, tm.makeUIntIndex),
- ],
- )
- def test_compatible_inconsistent_pairs(idx_fact1, idx_fact2):
- # GH 23525
- idx1 = idx_fact1(10)
- idx2 = idx_fact2(20)
- res1 = idx1.union(idx2)
- res2 = idx2.union(idx1)
- assert res1.dtype in (idx1.dtype, idx2.dtype)
- assert res2.dtype in (idx1.dtype, idx2.dtype)
- @pytest.mark.parametrize(
- "left, right, expected",
- [
- ("int64", "int64", "int64"),
- ("int64", "uint64", "object"),
- ("int64", "float64", "float64"),
- ("uint64", "float64", "float64"),
- ("uint64", "uint64", "uint64"),
- ("float64", "float64", "float64"),
- ("datetime64[ns]", "int64", "object"),
- ("datetime64[ns]", "uint64", "object"),
- ("datetime64[ns]", "float64", "object"),
- ("datetime64[ns, CET]", "int64", "object"),
- ("datetime64[ns, CET]", "uint64", "object"),
- ("datetime64[ns, CET]", "float64", "object"),
- ("Period[D]", "int64", "object"),
- ("Period[D]", "uint64", "object"),
- ("Period[D]", "float64", "object"),
- ],
- )
- @pytest.mark.parametrize("names", [("foo", "foo", "foo"), ("foo", "bar", None)])
- def test_union_dtypes(left, right, expected, names):
- left = pandas_dtype(left)
- right = pandas_dtype(right)
- a = Index([], dtype=left, name=names[0])
- b = Index([], dtype=right, name=names[1])
- result = a.union(b)
- assert result.dtype == expected
- assert result.name == names[2]
- # Testing name retention
- # TODO: pin down desired dtype; do we want it to be commutative?
- result = a.intersection(b)
- assert result.name == names[2]
- @pytest.mark.parametrize("values", [[1, 2, 2, 3], [3, 3]])
- def test_intersection_duplicates(values):
- # GH#31326
- a = Index(values)
- b = Index([3, 3])
- result = a.intersection(b)
- expected = Index([3])
- tm.assert_index_equal(result, expected)
- class TestSetOps:
- # Set operation tests shared by all indexes in the `index` fixture
- @pytest.mark.parametrize("case", [0.5, "xxx"])
- @pytest.mark.parametrize(
- "method", ["intersection", "union", "difference", "symmetric_difference"]
- )
- def test_set_ops_error_cases(self, case, method, index):
- # non-iterable input
- msg = "Input must be Index or array-like"
- with pytest.raises(TypeError, match=msg):
- getattr(index, method)(case)
- def test_intersection_base(self, index):
- if isinstance(index, CategoricalIndex):
- return
- first = index[:5]
- second = index[:3]
- intersect = first.intersection(second)
- assert tm.equalContents(intersect, second)
- if is_datetime64tz_dtype(index.dtype):
- # The second.values below will drop tz, so the rest of this test
- # is not applicable.
- return
- # GH#10149
- cases = [second.to_numpy(), second.to_series(), second.to_list()]
- for case in cases:
- result = first.intersection(case)
- assert tm.equalContents(result, second)
- if isinstance(index, MultiIndex):
- msg = "other must be a MultiIndex or a list of tuples"
- with pytest.raises(TypeError, match=msg):
- first.intersection([1, 2, 3])
- @pytest.mark.filterwarnings(
- "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
- )
- def test_union_base(self, index):
- first = index[3:]
- second = index[:5]
- everything = index
- union = first.union(second)
- assert tm.equalContents(union, everything)
- if is_datetime64tz_dtype(index.dtype):
- # The second.values below will drop tz, so the rest of this test
- # is not applicable.
- return
- # GH#10149
- cases = [second.to_numpy(), second.to_series(), second.to_list()]
- for case in cases:
- result = first.union(case)
- assert tm.equalContents(result, everything)
- if isinstance(index, MultiIndex):
- msg = "other must be a MultiIndex or a list of tuples"
- with pytest.raises(TypeError, match=msg):
- first.union([1, 2, 3])
- @pytest.mark.filterwarnings(
- "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
- )
- def test_difference_base(self, sort, index):
- first = index[2:]
- second = index[:4]
- if is_bool_dtype(index):
- # i think (TODO: be sure) there assumptions baked in about
- # the index fixture that don't hold here?
- answer = set(first).difference(set(second))
- elif isinstance(index, CategoricalIndex):
- answer = []
- else:
- answer = index[4:]
- result = first.difference(second, sort)
- assert tm.equalContents(result, answer)
- # GH#10149
- cases = [second.to_numpy(), second.to_series(), second.to_list()]
- for case in cases:
- result = first.difference(case, sort)
- assert tm.equalContents(result, answer)
- if isinstance(index, MultiIndex):
- msg = "other must be a MultiIndex or a list of tuples"
- with pytest.raises(TypeError, match=msg):
- first.difference([1, 2, 3], sort)
- @pytest.mark.filterwarnings(
- "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
- )
- def test_symmetric_difference(self, index):
- if isinstance(index, CategoricalIndex):
- return
- if len(index) < 2:
- return
- if index[0] in index[1:] or index[-1] in index[:-1]:
- # index fixture has e.g. an index of bools that does not satisfy this,
- # another with [0, 0, 1, 1, 2, 2]
- return
- first = index[1:]
- second = index[:-1]
- answer = index[[0, -1]]
- result = first.symmetric_difference(second)
- assert tm.equalContents(result, answer)
- # GH#10149
- cases = [second.to_numpy(), second.to_series(), second.to_list()]
- for case in cases:
- result = first.symmetric_difference(case)
- assert tm.equalContents(result, answer)
- if isinstance(index, MultiIndex):
- msg = "other must be a MultiIndex or a list of tuples"
- with pytest.raises(TypeError, match=msg):
- first.symmetric_difference([1, 2, 3])
- @pytest.mark.parametrize(
- "fname, sname, expected_name",
- [
- ("A", "A", "A"),
- ("A", "B", None),
- ("A", None, None),
- (None, "B", None),
- (None, None, None),
- ],
- )
- def test_corner_union(self, index_flat, fname, sname, expected_name):
- # GH#9943, GH#9862
- # Test unions with various name combinations
- # Do not test MultiIndex or repeats
- if not index_flat.is_unique:
- pytest.skip("Randomly generated index_flat was not unique.")
- index = index_flat
- # Test copy.union(copy)
- first = index.copy().set_names(fname)
- second = index.copy().set_names(sname)
- union = first.union(second)
- expected = index.copy().set_names(expected_name)
- tm.assert_index_equal(union, expected)
- # Test copy.union(empty)
- first = index.copy().set_names(fname)
- second = index.drop(index).set_names(sname)
- union = first.union(second)
- expected = index.copy().set_names(expected_name)
- tm.assert_index_equal(union, expected)
- # Test empty.union(copy)
- first = index.drop(index).set_names(fname)
- second = index.copy().set_names(sname)
- union = first.union(second)
- expected = index.copy().set_names(expected_name)
- tm.assert_index_equal(union, expected)
- # Test empty.union(empty)
- first = index.drop(index).set_names(fname)
- second = index.drop(index).set_names(sname)
- union = first.union(second)
- expected = index.drop(index).set_names(expected_name)
- tm.assert_index_equal(union, expected)
- @pytest.mark.parametrize(
- "fname, sname, expected_name",
- [
- ("A", "A", "A"),
- ("A", "B", None),
- ("A", None, None),
- (None, "B", None),
- (None, None, None),
- ],
- )
- def test_union_unequal(self, index_flat, fname, sname, expected_name):
- if not index_flat.is_unique:
- pytest.skip("Randomly generated index_flat was not unique.")
- index = index_flat
- # test copy.union(subset) - need sort for unicode and string
- first = index.copy().set_names(fname)
- second = index[1:].set_names(sname)
- union = first.union(second).sort_values()
- expected = index.set_names(expected_name).sort_values()
- tm.assert_index_equal(union, expected)
- @pytest.mark.parametrize(
- "fname, sname, expected_name",
- [
- ("A", "A", "A"),
- ("A", "B", None),
- ("A", None, None),
- (None, "B", None),
- (None, None, None),
- ],
- )
- def test_corner_intersect(self, index_flat, fname, sname, expected_name):
- # GH#35847
- # Test intersections with various name combinations
- if not index_flat.is_unique:
- pytest.skip("Randomly generated index_flat was not unique.")
- index = index_flat
- # Test copy.intersection(copy)
- first = index.copy().set_names(fname)
- second = index.copy().set_names(sname)
- intersect = first.intersection(second)
- expected = index.copy().set_names(expected_name)
- tm.assert_index_equal(intersect, expected)
- # Test copy.intersection(empty)
- first = index.copy().set_names(fname)
- second = index.drop(index).set_names(sname)
- intersect = first.intersection(second)
- expected = index.drop(index).set_names(expected_name)
- tm.assert_index_equal(intersect, expected)
- # Test empty.intersection(copy)
- first = index.drop(index).set_names(fname)
- second = index.copy().set_names(sname)
- intersect = first.intersection(second)
- expected = index.drop(index).set_names(expected_name)
- tm.assert_index_equal(intersect, expected)
- # Test empty.intersection(empty)
- first = index.drop(index).set_names(fname)
- second = index.drop(index).set_names(sname)
- intersect = first.intersection(second)
- expected = index.drop(index).set_names(expected_name)
- tm.assert_index_equal(intersect, expected)
- @pytest.mark.parametrize(
- "fname, sname, expected_name",
- [
- ("A", "A", "A"),
- ("A", "B", None),
- ("A", None, None),
- (None, "B", None),
- (None, None, None),
- ],
- )
- def test_intersect_unequal(self, index_flat, fname, sname, expected_name):
- if not index_flat.is_unique:
- pytest.skip("Randomly generated index_flat was not unique.")
- index = index_flat
- # test copy.intersection(subset) - need sort for unicode and string
- first = index.copy().set_names(fname)
- second = index[1:].set_names(sname)
- intersect = first.intersection(second).sort_values()
- expected = index[1:].set_names(expected_name).sort_values()
- tm.assert_index_equal(intersect, expected)
- def test_intersection_name_retention_with_nameless(self, index):
- if isinstance(index, MultiIndex):
- index = index.rename(list(range(index.nlevels)))
- else:
- index = index.rename("foo")
- other = np.asarray(index)
- result = index.intersection(other)
- assert result.name == index.name
- # empty other, same dtype
- result = index.intersection(other[:0])
- assert result.name == index.name
- # empty `self`
- result = index[:0].intersection(other)
- assert result.name == index.name
- def test_difference_preserves_type_empty(self, index, sort):
- # GH#20040
- # If taking difference of a set and itself, it
- # needs to preserve the type of the index
- if not index.is_unique:
- return
- result = index.difference(index, sort=sort)
- expected = index[:0]
- tm.assert_index_equal(result, expected, exact=True)
- def test_difference_name_retention_equals(self, index, names):
- if isinstance(index, MultiIndex):
- names = [[x] * index.nlevels for x in names]
- index = index.rename(names[0])
- other = index.rename(names[1])
- assert index.equals(other)
- result = index.difference(other)
- expected = index[:0].rename(names[2])
- tm.assert_index_equal(result, expected)
- def test_intersection_difference_match_empty(self, index, sort):
- # GH#20040
- # Test that the intersection of an index with an
- # empty index produces the same index as the difference
- # of an index with itself. Test for all types
- if not index.is_unique:
- return
- inter = index.intersection(index[:0])
- diff = index.difference(index, sort=sort)
- tm.assert_index_equal(inter, diff, exact=True)
- @pytest.mark.filterwarnings(
- "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
- )
- @pytest.mark.parametrize(
- "method", ["intersection", "union", "difference", "symmetric_difference"]
- )
- def test_setop_with_categorical(index_flat, sort, method):
- # MultiIndex tested separately in tests.indexes.multi.test_setops
- index = index_flat
- other = index.astype("category")
- exact = "equiv" if isinstance(index, RangeIndex) else True
- result = getattr(index, method)(other, sort=sort)
- expected = getattr(index, method)(index, sort=sort)
- tm.assert_index_equal(result, expected, exact=exact)
- result = getattr(index, method)(other[:5], sort=sort)
- expected = getattr(index, method)(index[:5], sort=sort)
- tm.assert_index_equal(result, expected, exact=exact)
- def test_intersection_duplicates_all_indexes(index):
- # GH#38743
- if index.empty:
- # No duplicates in empty indexes
- return
- idx = index
- idx_non_unique = idx[[0, 0, 1, 2]]
- assert idx.intersection(idx_non_unique).equals(idx_non_unique.intersection(idx))
- assert idx.intersection(idx_non_unique).is_unique
- def test_union_duplicate_index_subsets_of_each_other(
- any_dtype_for_small_pos_integer_indexes,
- ):
- # GH#31326
- dtype = any_dtype_for_small_pos_integer_indexes
- a = Index([1, 2, 2, 3], dtype=dtype)
- b = Index([3, 3, 4], dtype=dtype)
- expected = Index([1, 2, 2, 3, 3, 4], dtype=dtype)
- if isinstance(a, CategoricalIndex):
- expected = Index([1, 2, 2, 3, 3, 4])
- result = a.union(b)
- tm.assert_index_equal(result, expected)
- result = a.union(b, sort=False)
- tm.assert_index_equal(result, expected)
- def test_union_with_duplicate_index_and_non_monotonic(
- any_dtype_for_small_pos_integer_indexes,
- ):
- # GH#36289
- dtype = any_dtype_for_small_pos_integer_indexes
- a = Index([1, 0, 0], dtype=dtype)
- b = Index([0, 1], dtype=dtype)
- expected = Index([0, 0, 1], dtype=dtype)
- result = a.union(b)
- tm.assert_index_equal(result, expected)
- result = b.union(a)
- tm.assert_index_equal(result, expected)
- def test_union_duplicate_index_different_dtypes():
- # GH#36289
- a = Index([1, 2, 2, 3])
- b = Index(["1", "0", "0"])
- expected = Index([1, 2, 2, 3, "1", "0", "0"])
- result = a.union(b, sort=False)
- tm.assert_index_equal(result, expected)
- def test_union_same_value_duplicated_in_both():
- # GH#36289
- a = Index([0, 0, 1])
- b = Index([0, 0, 1, 2])
- result = a.union(b)
- expected = Index([0, 0, 1, 2])
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize("dup", [1, np.nan])
- def test_union_nan_in_both(dup):
- # GH#36289
- a = Index([np.nan, 1, 2, 2])
- b = Index([np.nan, dup, 1, 2])
- result = a.union(b, sort=False)
- expected = Index([np.nan, dup, 1.0, 2.0, 2.0])
- tm.assert_index_equal(result, expected)
- def test_union_rangeindex_sort_true():
- # GH 53490
- idx1 = RangeIndex(1, 100, 6)
- idx2 = RangeIndex(1, 50, 3)
- result = idx1.union(idx2, sort=True)
- expected = Index(
- [
- 1,
- 4,
- 7,
- 10,
- 13,
- 16,
- 19,
- 22,
- 25,
- 28,
- 31,
- 34,
- 37,
- 40,
- 43,
- 46,
- 49,
- 55,
- 61,
- 67,
- 73,
- 79,
- 85,
- 91,
- 97,
- ]
- )
- tm.assert_index_equal(result, expected)
- def test_union_with_duplicate_index_not_subset_and_non_monotonic(
- any_dtype_for_small_pos_integer_indexes,
- ):
- # GH#36289
- dtype = any_dtype_for_small_pos_integer_indexes
- a = Index([1, 0, 2], dtype=dtype)
- b = Index([0, 0, 1], dtype=dtype)
- expected = Index([0, 0, 1, 2], dtype=dtype)
- if isinstance(a, CategoricalIndex):
- expected = Index([0, 0, 1, 2])
- result = a.union(b)
- tm.assert_index_equal(result, expected)
- result = b.union(a)
- tm.assert_index_equal(result, expected)
- def test_union_int_categorical_with_nan():
- ci = CategoricalIndex([1, 2, np.nan])
- assert ci.categories.dtype.kind == "i"
- idx = Index([1, 2])
- result = idx.union(ci)
- expected = Index([1, 2, np.nan], dtype=np.float64)
- tm.assert_index_equal(result, expected)
- result = ci.union(idx)
- tm.assert_index_equal(result, expected)
- class TestSetOpsUnsorted:
- # These may eventually belong in a dtype-specific test_setops, or
- # parametrized over a more general fixture
- def test_intersect_str_dates(self):
- dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]
- index1 = Index(dt_dates, dtype=object)
- index2 = Index(["aa"], dtype=object)
- result = index2.intersection(index1)
- expected = Index([], dtype=object)
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize("index", ["string"], indirect=True)
- def test_intersection(self, index, sort):
- first = index[:20]
- second = index[:10]
- intersect = first.intersection(second, sort=sort)
- if sort is None:
- tm.assert_index_equal(intersect, second.sort_values())
- assert tm.equalContents(intersect, second)
- # Corner cases
- inter = first.intersection(first, sort=sort)
- assert inter is first
- @pytest.mark.parametrize(
- "index2,keeps_name",
- [
- (Index([3, 4, 5, 6, 7], name="index"), True), # preserve same name
- (Index([3, 4, 5, 6, 7], name="other"), False), # drop diff names
- (Index([3, 4, 5, 6, 7]), False),
- ],
- )
- def test_intersection_name_preservation(self, index2, keeps_name, sort):
- index1 = Index([1, 2, 3, 4, 5], name="index")
- expected = Index([3, 4, 5])
- result = index1.intersection(index2, sort)
- if keeps_name:
- expected.name = "index"
- assert result.name == expected.name
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize("index", ["string"], indirect=True)
- @pytest.mark.parametrize(
- "first_name,second_name,expected_name",
- [("A", "A", "A"), ("A", "B", None), (None, "B", None)],
- )
- def test_intersection_name_preservation2(
- self, index, first_name, second_name, expected_name, sort
- ):
- first = index[5:20]
- second = index[:10]
- first.name = first_name
- second.name = second_name
- intersect = first.intersection(second, sort=sort)
- assert intersect.name == expected_name
- def test_chained_union(self, sort):
- # Chained unions handles names correctly
- i1 = Index([1, 2], name="i1")
- i2 = Index([5, 6], name="i2")
- i3 = Index([3, 4], name="i3")
- union = i1.union(i2.union(i3, sort=sort), sort=sort)
- expected = i1.union(i2, sort=sort).union(i3, sort=sort)
- tm.assert_index_equal(union, expected)
- j1 = Index([1, 2], name="j1")
- j2 = Index([], name="j2")
- j3 = Index([], name="j3")
- union = j1.union(j2.union(j3, sort=sort), sort=sort)
- expected = j1.union(j2, sort=sort).union(j3, sort=sort)
- tm.assert_index_equal(union, expected)
- @pytest.mark.parametrize("index", ["string"], indirect=True)
- def test_union(self, index, sort):
- first = index[5:20]
- second = index[:10]
- everything = index[:20]
- union = first.union(second, sort=sort)
- if sort is None:
- tm.assert_index_equal(union, everything.sort_values())
- assert tm.equalContents(union, everything)
- @pytest.mark.parametrize("klass", [np.array, Series, list])
- @pytest.mark.parametrize("index", ["string"], indirect=True)
- def test_union_from_iterables(self, index, klass, sort):
- # GH#10149
- first = index[5:20]
- second = index[:10]
- everything = index[:20]
- case = klass(second.values)
- result = first.union(case, sort=sort)
- if sort is None:
- tm.assert_index_equal(result, everything.sort_values())
- assert tm.equalContents(result, everything)
- @pytest.mark.parametrize("index", ["string"], indirect=True)
- def test_union_identity(self, index, sort):
- first = index[5:20]
- union = first.union(first, sort=sort)
- # i.e. identity is not preserved when sort is True
- assert (union is first) is (not sort)
- # This should no longer be the same object, since [] is not consistent,
- # both objects will be recast to dtype('O')
- union = first.union([], sort=sort)
- assert (union is first) is (not sort)
- union = Index([]).union(first, sort=sort)
- assert (union is first) is (not sort)
- @pytest.mark.parametrize("index", ["string"], indirect=True)
- @pytest.mark.parametrize("second_name,expected", [(None, None), ("name", "name")])
- def test_difference_name_preservation(self, index, second_name, expected, sort):
- first = index[5:20]
- second = index[:10]
- answer = index[10:20]
- first.name = "name"
- second.name = second_name
- result = first.difference(second, sort=sort)
- assert tm.equalContents(result, answer)
- if expected is None:
- assert result.name is None
- else:
- assert result.name == expected
- def test_difference_empty_arg(self, index, sort):
- first = index[5:20]
- first.name = "name"
- result = first.difference([], sort)
- tm.assert_index_equal(result, first)
- @pytest.mark.parametrize("index", ["string"], indirect=True)
- def test_difference_identity(self, index, sort):
- first = index[5:20]
- first.name = "name"
- result = first.difference(first, sort)
- assert len(result) == 0
- assert result.name == first.name
- @pytest.mark.parametrize("index", ["string"], indirect=True)
- def test_difference_sort(self, index, sort):
- first = index[5:20]
- second = index[:10]
- result = first.difference(second, sort)
- expected = index[10:20]
- if sort is None:
- expected = expected.sort_values()
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize("opname", ["difference", "symmetric_difference"])
- def test_difference_incomparable(self, opname):
- a = Index([3, Timestamp("2000"), 1])
- b = Index([2, Timestamp("1999"), 1])
- op = operator.methodcaller(opname, b)
- with tm.assert_produces_warning(RuntimeWarning):
- # sort=None, the default
- result = op(a)
- expected = Index([3, Timestamp("2000"), 2, Timestamp("1999")])
- if opname == "difference":
- expected = expected[:2]
- tm.assert_index_equal(result, expected)
- # sort=False
- op = operator.methodcaller(opname, b, sort=False)
- result = op(a)
- tm.assert_index_equal(result, expected)
- @pytest.mark.parametrize("opname", ["difference", "symmetric_difference"])
- def test_difference_incomparable_true(self, opname):
- a = Index([3, Timestamp("2000"), 1])
- b = Index([2, Timestamp("1999"), 1])
- op = operator.methodcaller(opname, b, sort=True)
- msg = "'<' not supported between instances of 'Timestamp' and 'int'"
- with pytest.raises(TypeError, match=msg):
- op(a)
- def test_symmetric_difference_mi(self, sort):
- index1 = MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3]))
- index2 = MultiIndex.from_tuples([("foo", 1), ("bar", 3)])
- result = index1.symmetric_difference(index2, sort=sort)
- expected = MultiIndex.from_tuples([("bar", 2), ("baz", 3), ("bar", 3)])
- if sort is None:
- expected = expected.sort_values()
- tm.assert_index_equal(result, expected)
- assert tm.equalContents(result, expected)
- @pytest.mark.parametrize(
- "index2,expected",
- [
- (Index([0, 1, np.nan]), Index([2.0, 3.0, 0.0])),
- (Index([0, 1]), Index([np.nan, 2.0, 3.0, 0.0])),
- ],
- )
- def test_symmetric_difference_missing(self, index2, expected, sort):
- # GH#13514 change: {nan} - {nan} == {}
- # (GH#6444, sorting of nans, is no longer an issue)
- index1 = Index([1, np.nan, 2, 3])
- result = index1.symmetric_difference(index2, sort=sort)
- if sort is None:
- expected = expected.sort_values()
- tm.assert_index_equal(result, expected)
- def test_symmetric_difference_non_index(self, sort):
- index1 = Index([1, 2, 3, 4], name="index1")
- index2 = np.array([2, 3, 4, 5])
- expected = Index([1, 5])
- result = index1.symmetric_difference(index2, sort=sort)
- assert tm.equalContents(result, expected)
- assert result.name == "index1"
- result = index1.symmetric_difference(index2, result_name="new_name", sort=sort)
- assert tm.equalContents(result, expected)
- assert result.name == "new_name"
- def test_union_ea_dtypes(self, any_numeric_ea_and_arrow_dtype):
- # GH#51365
- idx = Index([1, 2, 3], dtype=any_numeric_ea_and_arrow_dtype)
- idx2 = Index([3, 4, 5], dtype=any_numeric_ea_and_arrow_dtype)
- result = idx.union(idx2)
- expected = Index([1, 2, 3, 4, 5], dtype=any_numeric_ea_and_arrow_dtype)
- tm.assert_index_equal(result, expected)
|