test_setops.py 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888
  1. """
  2. The tests in this package are to ensure the proper resultant dtypes of
  3. set operations.
  4. """
  5. from datetime import datetime
  6. import operator
  7. import numpy as np
  8. import pytest
  9. from pandas.core.dtypes.cast import find_common_type
  10. from pandas import (
  11. CategoricalIndex,
  12. Index,
  13. MultiIndex,
  14. RangeIndex,
  15. Series,
  16. Timestamp,
  17. )
  18. import pandas._testing as tm
  19. from pandas.api.types import (
  20. is_bool_dtype,
  21. is_datetime64tz_dtype,
  22. is_signed_integer_dtype,
  23. pandas_dtype,
  24. )
  25. def test_union_same_types(index):
  26. # Union with a non-unique, non-monotonic index raises error
  27. # Only needed for bool index factory
  28. idx1 = index.sort_values()
  29. idx2 = index.sort_values()
  30. assert idx1.union(idx2).dtype == idx1.dtype
  31. def test_union_different_types(index_flat, index_flat2, request):
  32. # This test only considers combinations of indices
  33. # GH 23525
  34. idx1 = index_flat
  35. idx2 = index_flat2
  36. if (
  37. not idx1.is_unique
  38. and not idx2.is_unique
  39. and idx1.dtype.kind == "i"
  40. and idx2.dtype.kind == "b"
  41. ) or (
  42. not idx2.is_unique
  43. and not idx1.is_unique
  44. and idx2.dtype.kind == "i"
  45. and idx1.dtype.kind == "b"
  46. ):
  47. # Each condition had idx[1|2].is_monotonic_decreasing
  48. # but failed when e.g.
  49. # idx1 = Index(
  50. # [True, True, True, True, True, True, True, True, False, False], dtype='bool'
  51. # )
  52. # idx2 = Index([0, 0, 1, 1, 2, 2], dtype='int64')
  53. mark = pytest.mark.xfail(
  54. reason="GH#44000 True==1", raises=ValueError, strict=False
  55. )
  56. request.node.add_marker(mark)
  57. common_dtype = find_common_type([idx1.dtype, idx2.dtype])
  58. warn = None
  59. if not len(idx1) or not len(idx2):
  60. pass
  61. elif (
  62. idx1.dtype.kind == "c"
  63. and (
  64. idx2.dtype.kind not in ["i", "u", "f", "c"]
  65. or not isinstance(idx2.dtype, np.dtype)
  66. )
  67. ) or (
  68. idx2.dtype.kind == "c"
  69. and (
  70. idx1.dtype.kind not in ["i", "u", "f", "c"]
  71. or not isinstance(idx1.dtype, np.dtype)
  72. )
  73. ):
  74. # complex objects non-sortable
  75. warn = RuntimeWarning
  76. any_uint64 = np.uint64 in (idx1.dtype, idx2.dtype)
  77. idx1_signed = is_signed_integer_dtype(idx1.dtype)
  78. idx2_signed = is_signed_integer_dtype(idx2.dtype)
  79. # Union with a non-unique, non-monotonic index raises error
  80. # This applies to the boolean index
  81. idx1 = idx1.sort_values()
  82. idx2 = idx2.sort_values()
  83. with tm.assert_produces_warning(warn, match="'<' not supported between"):
  84. res1 = idx1.union(idx2)
  85. res2 = idx2.union(idx1)
  86. if any_uint64 and (idx1_signed or idx2_signed):
  87. assert res1.dtype == np.dtype("O")
  88. assert res2.dtype == np.dtype("O")
  89. else:
  90. assert res1.dtype == common_dtype
  91. assert res2.dtype == common_dtype
  92. @pytest.mark.parametrize(
  93. "idx_fact1,idx_fact2",
  94. [
  95. (tm.makeIntIndex, tm.makeRangeIndex),
  96. (tm.makeFloatIndex, tm.makeIntIndex),
  97. (tm.makeFloatIndex, tm.makeRangeIndex),
  98. (tm.makeFloatIndex, tm.makeUIntIndex),
  99. ],
  100. )
  101. def test_compatible_inconsistent_pairs(idx_fact1, idx_fact2):
  102. # GH 23525
  103. idx1 = idx_fact1(10)
  104. idx2 = idx_fact2(20)
  105. res1 = idx1.union(idx2)
  106. res2 = idx2.union(idx1)
  107. assert res1.dtype in (idx1.dtype, idx2.dtype)
  108. assert res2.dtype in (idx1.dtype, idx2.dtype)
  109. @pytest.mark.parametrize(
  110. "left, right, expected",
  111. [
  112. ("int64", "int64", "int64"),
  113. ("int64", "uint64", "object"),
  114. ("int64", "float64", "float64"),
  115. ("uint64", "float64", "float64"),
  116. ("uint64", "uint64", "uint64"),
  117. ("float64", "float64", "float64"),
  118. ("datetime64[ns]", "int64", "object"),
  119. ("datetime64[ns]", "uint64", "object"),
  120. ("datetime64[ns]", "float64", "object"),
  121. ("datetime64[ns, CET]", "int64", "object"),
  122. ("datetime64[ns, CET]", "uint64", "object"),
  123. ("datetime64[ns, CET]", "float64", "object"),
  124. ("Period[D]", "int64", "object"),
  125. ("Period[D]", "uint64", "object"),
  126. ("Period[D]", "float64", "object"),
  127. ],
  128. )
  129. @pytest.mark.parametrize("names", [("foo", "foo", "foo"), ("foo", "bar", None)])
  130. def test_union_dtypes(left, right, expected, names):
  131. left = pandas_dtype(left)
  132. right = pandas_dtype(right)
  133. a = Index([], dtype=left, name=names[0])
  134. b = Index([], dtype=right, name=names[1])
  135. result = a.union(b)
  136. assert result.dtype == expected
  137. assert result.name == names[2]
  138. # Testing name retention
  139. # TODO: pin down desired dtype; do we want it to be commutative?
  140. result = a.intersection(b)
  141. assert result.name == names[2]
  142. @pytest.mark.parametrize("values", [[1, 2, 2, 3], [3, 3]])
  143. def test_intersection_duplicates(values):
  144. # GH#31326
  145. a = Index(values)
  146. b = Index([3, 3])
  147. result = a.intersection(b)
  148. expected = Index([3])
  149. tm.assert_index_equal(result, expected)
  150. class TestSetOps:
  151. # Set operation tests shared by all indexes in the `index` fixture
  152. @pytest.mark.parametrize("case", [0.5, "xxx"])
  153. @pytest.mark.parametrize(
  154. "method", ["intersection", "union", "difference", "symmetric_difference"]
  155. )
  156. def test_set_ops_error_cases(self, case, method, index):
  157. # non-iterable input
  158. msg = "Input must be Index or array-like"
  159. with pytest.raises(TypeError, match=msg):
  160. getattr(index, method)(case)
  161. def test_intersection_base(self, index):
  162. if isinstance(index, CategoricalIndex):
  163. return
  164. first = index[:5]
  165. second = index[:3]
  166. intersect = first.intersection(second)
  167. assert tm.equalContents(intersect, second)
  168. if is_datetime64tz_dtype(index.dtype):
  169. # The second.values below will drop tz, so the rest of this test
  170. # is not applicable.
  171. return
  172. # GH#10149
  173. cases = [second.to_numpy(), second.to_series(), second.to_list()]
  174. for case in cases:
  175. result = first.intersection(case)
  176. assert tm.equalContents(result, second)
  177. if isinstance(index, MultiIndex):
  178. msg = "other must be a MultiIndex or a list of tuples"
  179. with pytest.raises(TypeError, match=msg):
  180. first.intersection([1, 2, 3])
  181. @pytest.mark.filterwarnings(
  182. "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
  183. )
  184. def test_union_base(self, index):
  185. first = index[3:]
  186. second = index[:5]
  187. everything = index
  188. union = first.union(second)
  189. assert tm.equalContents(union, everything)
  190. if is_datetime64tz_dtype(index.dtype):
  191. # The second.values below will drop tz, so the rest of this test
  192. # is not applicable.
  193. return
  194. # GH#10149
  195. cases = [second.to_numpy(), second.to_series(), second.to_list()]
  196. for case in cases:
  197. result = first.union(case)
  198. assert tm.equalContents(result, everything)
  199. if isinstance(index, MultiIndex):
  200. msg = "other must be a MultiIndex or a list of tuples"
  201. with pytest.raises(TypeError, match=msg):
  202. first.union([1, 2, 3])
  203. @pytest.mark.filterwarnings(
  204. "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
  205. )
  206. def test_difference_base(self, sort, index):
  207. first = index[2:]
  208. second = index[:4]
  209. if is_bool_dtype(index):
  210. # i think (TODO: be sure) there assumptions baked in about
  211. # the index fixture that don't hold here?
  212. answer = set(first).difference(set(second))
  213. elif isinstance(index, CategoricalIndex):
  214. answer = []
  215. else:
  216. answer = index[4:]
  217. result = first.difference(second, sort)
  218. assert tm.equalContents(result, answer)
  219. # GH#10149
  220. cases = [second.to_numpy(), second.to_series(), second.to_list()]
  221. for case in cases:
  222. result = first.difference(case, sort)
  223. assert tm.equalContents(result, answer)
  224. if isinstance(index, MultiIndex):
  225. msg = "other must be a MultiIndex or a list of tuples"
  226. with pytest.raises(TypeError, match=msg):
  227. first.difference([1, 2, 3], sort)
  228. @pytest.mark.filterwarnings(
  229. "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
  230. )
  231. def test_symmetric_difference(self, index):
  232. if isinstance(index, CategoricalIndex):
  233. return
  234. if len(index) < 2:
  235. return
  236. if index[0] in index[1:] or index[-1] in index[:-1]:
  237. # index fixture has e.g. an index of bools that does not satisfy this,
  238. # another with [0, 0, 1, 1, 2, 2]
  239. return
  240. first = index[1:]
  241. second = index[:-1]
  242. answer = index[[0, -1]]
  243. result = first.symmetric_difference(second)
  244. assert tm.equalContents(result, answer)
  245. # GH#10149
  246. cases = [second.to_numpy(), second.to_series(), second.to_list()]
  247. for case in cases:
  248. result = first.symmetric_difference(case)
  249. assert tm.equalContents(result, answer)
  250. if isinstance(index, MultiIndex):
  251. msg = "other must be a MultiIndex or a list of tuples"
  252. with pytest.raises(TypeError, match=msg):
  253. first.symmetric_difference([1, 2, 3])
  254. @pytest.mark.parametrize(
  255. "fname, sname, expected_name",
  256. [
  257. ("A", "A", "A"),
  258. ("A", "B", None),
  259. ("A", None, None),
  260. (None, "B", None),
  261. (None, None, None),
  262. ],
  263. )
  264. def test_corner_union(self, index_flat, fname, sname, expected_name):
  265. # GH#9943, GH#9862
  266. # Test unions with various name combinations
  267. # Do not test MultiIndex or repeats
  268. if not index_flat.is_unique:
  269. pytest.skip("Randomly generated index_flat was not unique.")
  270. index = index_flat
  271. # Test copy.union(copy)
  272. first = index.copy().set_names(fname)
  273. second = index.copy().set_names(sname)
  274. union = first.union(second)
  275. expected = index.copy().set_names(expected_name)
  276. tm.assert_index_equal(union, expected)
  277. # Test copy.union(empty)
  278. first = index.copy().set_names(fname)
  279. second = index.drop(index).set_names(sname)
  280. union = first.union(second)
  281. expected = index.copy().set_names(expected_name)
  282. tm.assert_index_equal(union, expected)
  283. # Test empty.union(copy)
  284. first = index.drop(index).set_names(fname)
  285. second = index.copy().set_names(sname)
  286. union = first.union(second)
  287. expected = index.copy().set_names(expected_name)
  288. tm.assert_index_equal(union, expected)
  289. # Test empty.union(empty)
  290. first = index.drop(index).set_names(fname)
  291. second = index.drop(index).set_names(sname)
  292. union = first.union(second)
  293. expected = index.drop(index).set_names(expected_name)
  294. tm.assert_index_equal(union, expected)
  295. @pytest.mark.parametrize(
  296. "fname, sname, expected_name",
  297. [
  298. ("A", "A", "A"),
  299. ("A", "B", None),
  300. ("A", None, None),
  301. (None, "B", None),
  302. (None, None, None),
  303. ],
  304. )
  305. def test_union_unequal(self, index_flat, fname, sname, expected_name):
  306. if not index_flat.is_unique:
  307. pytest.skip("Randomly generated index_flat was not unique.")
  308. index = index_flat
  309. # test copy.union(subset) - need sort for unicode and string
  310. first = index.copy().set_names(fname)
  311. second = index[1:].set_names(sname)
  312. union = first.union(second).sort_values()
  313. expected = index.set_names(expected_name).sort_values()
  314. tm.assert_index_equal(union, expected)
  315. @pytest.mark.parametrize(
  316. "fname, sname, expected_name",
  317. [
  318. ("A", "A", "A"),
  319. ("A", "B", None),
  320. ("A", None, None),
  321. (None, "B", None),
  322. (None, None, None),
  323. ],
  324. )
  325. def test_corner_intersect(self, index_flat, fname, sname, expected_name):
  326. # GH#35847
  327. # Test intersections with various name combinations
  328. if not index_flat.is_unique:
  329. pytest.skip("Randomly generated index_flat was not unique.")
  330. index = index_flat
  331. # Test copy.intersection(copy)
  332. first = index.copy().set_names(fname)
  333. second = index.copy().set_names(sname)
  334. intersect = first.intersection(second)
  335. expected = index.copy().set_names(expected_name)
  336. tm.assert_index_equal(intersect, expected)
  337. # Test copy.intersection(empty)
  338. first = index.copy().set_names(fname)
  339. second = index.drop(index).set_names(sname)
  340. intersect = first.intersection(second)
  341. expected = index.drop(index).set_names(expected_name)
  342. tm.assert_index_equal(intersect, expected)
  343. # Test empty.intersection(copy)
  344. first = index.drop(index).set_names(fname)
  345. second = index.copy().set_names(sname)
  346. intersect = first.intersection(second)
  347. expected = index.drop(index).set_names(expected_name)
  348. tm.assert_index_equal(intersect, expected)
  349. # Test empty.intersection(empty)
  350. first = index.drop(index).set_names(fname)
  351. second = index.drop(index).set_names(sname)
  352. intersect = first.intersection(second)
  353. expected = index.drop(index).set_names(expected_name)
  354. tm.assert_index_equal(intersect, expected)
  355. @pytest.mark.parametrize(
  356. "fname, sname, expected_name",
  357. [
  358. ("A", "A", "A"),
  359. ("A", "B", None),
  360. ("A", None, None),
  361. (None, "B", None),
  362. (None, None, None),
  363. ],
  364. )
  365. def test_intersect_unequal(self, index_flat, fname, sname, expected_name):
  366. if not index_flat.is_unique:
  367. pytest.skip("Randomly generated index_flat was not unique.")
  368. index = index_flat
  369. # test copy.intersection(subset) - need sort for unicode and string
  370. first = index.copy().set_names(fname)
  371. second = index[1:].set_names(sname)
  372. intersect = first.intersection(second).sort_values()
  373. expected = index[1:].set_names(expected_name).sort_values()
  374. tm.assert_index_equal(intersect, expected)
  375. def test_intersection_name_retention_with_nameless(self, index):
  376. if isinstance(index, MultiIndex):
  377. index = index.rename(list(range(index.nlevels)))
  378. else:
  379. index = index.rename("foo")
  380. other = np.asarray(index)
  381. result = index.intersection(other)
  382. assert result.name == index.name
  383. # empty other, same dtype
  384. result = index.intersection(other[:0])
  385. assert result.name == index.name
  386. # empty `self`
  387. result = index[:0].intersection(other)
  388. assert result.name == index.name
  389. def test_difference_preserves_type_empty(self, index, sort):
  390. # GH#20040
  391. # If taking difference of a set and itself, it
  392. # needs to preserve the type of the index
  393. if not index.is_unique:
  394. return
  395. result = index.difference(index, sort=sort)
  396. expected = index[:0]
  397. tm.assert_index_equal(result, expected, exact=True)
  398. def test_difference_name_retention_equals(self, index, names):
  399. if isinstance(index, MultiIndex):
  400. names = [[x] * index.nlevels for x in names]
  401. index = index.rename(names[0])
  402. other = index.rename(names[1])
  403. assert index.equals(other)
  404. result = index.difference(other)
  405. expected = index[:0].rename(names[2])
  406. tm.assert_index_equal(result, expected)
  407. def test_intersection_difference_match_empty(self, index, sort):
  408. # GH#20040
  409. # Test that the intersection of an index with an
  410. # empty index produces the same index as the difference
  411. # of an index with itself. Test for all types
  412. if not index.is_unique:
  413. return
  414. inter = index.intersection(index[:0])
  415. diff = index.difference(index, sort=sort)
  416. tm.assert_index_equal(inter, diff, exact=True)
  417. @pytest.mark.filterwarnings(
  418. "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
  419. )
  420. @pytest.mark.parametrize(
  421. "method", ["intersection", "union", "difference", "symmetric_difference"]
  422. )
  423. def test_setop_with_categorical(index_flat, sort, method):
  424. # MultiIndex tested separately in tests.indexes.multi.test_setops
  425. index = index_flat
  426. other = index.astype("category")
  427. exact = "equiv" if isinstance(index, RangeIndex) else True
  428. result = getattr(index, method)(other, sort=sort)
  429. expected = getattr(index, method)(index, sort=sort)
  430. tm.assert_index_equal(result, expected, exact=exact)
  431. result = getattr(index, method)(other[:5], sort=sort)
  432. expected = getattr(index, method)(index[:5], sort=sort)
  433. tm.assert_index_equal(result, expected, exact=exact)
  434. def test_intersection_duplicates_all_indexes(index):
  435. # GH#38743
  436. if index.empty:
  437. # No duplicates in empty indexes
  438. return
  439. idx = index
  440. idx_non_unique = idx[[0, 0, 1, 2]]
  441. assert idx.intersection(idx_non_unique).equals(idx_non_unique.intersection(idx))
  442. assert idx.intersection(idx_non_unique).is_unique
  443. def test_union_duplicate_index_subsets_of_each_other(
  444. any_dtype_for_small_pos_integer_indexes,
  445. ):
  446. # GH#31326
  447. dtype = any_dtype_for_small_pos_integer_indexes
  448. a = Index([1, 2, 2, 3], dtype=dtype)
  449. b = Index([3, 3, 4], dtype=dtype)
  450. expected = Index([1, 2, 2, 3, 3, 4], dtype=dtype)
  451. if isinstance(a, CategoricalIndex):
  452. expected = Index([1, 2, 2, 3, 3, 4])
  453. result = a.union(b)
  454. tm.assert_index_equal(result, expected)
  455. result = a.union(b, sort=False)
  456. tm.assert_index_equal(result, expected)
  457. def test_union_with_duplicate_index_and_non_monotonic(
  458. any_dtype_for_small_pos_integer_indexes,
  459. ):
  460. # GH#36289
  461. dtype = any_dtype_for_small_pos_integer_indexes
  462. a = Index([1, 0, 0], dtype=dtype)
  463. b = Index([0, 1], dtype=dtype)
  464. expected = Index([0, 0, 1], dtype=dtype)
  465. result = a.union(b)
  466. tm.assert_index_equal(result, expected)
  467. result = b.union(a)
  468. tm.assert_index_equal(result, expected)
  469. def test_union_duplicate_index_different_dtypes():
  470. # GH#36289
  471. a = Index([1, 2, 2, 3])
  472. b = Index(["1", "0", "0"])
  473. expected = Index([1, 2, 2, 3, "1", "0", "0"])
  474. result = a.union(b, sort=False)
  475. tm.assert_index_equal(result, expected)
  476. def test_union_same_value_duplicated_in_both():
  477. # GH#36289
  478. a = Index([0, 0, 1])
  479. b = Index([0, 0, 1, 2])
  480. result = a.union(b)
  481. expected = Index([0, 0, 1, 2])
  482. tm.assert_index_equal(result, expected)
  483. @pytest.mark.parametrize("dup", [1, np.nan])
  484. def test_union_nan_in_both(dup):
  485. # GH#36289
  486. a = Index([np.nan, 1, 2, 2])
  487. b = Index([np.nan, dup, 1, 2])
  488. result = a.union(b, sort=False)
  489. expected = Index([np.nan, dup, 1.0, 2.0, 2.0])
  490. tm.assert_index_equal(result, expected)
  491. def test_union_rangeindex_sort_true():
  492. # GH 53490
  493. idx1 = RangeIndex(1, 100, 6)
  494. idx2 = RangeIndex(1, 50, 3)
  495. result = idx1.union(idx2, sort=True)
  496. expected = Index(
  497. [
  498. 1,
  499. 4,
  500. 7,
  501. 10,
  502. 13,
  503. 16,
  504. 19,
  505. 22,
  506. 25,
  507. 28,
  508. 31,
  509. 34,
  510. 37,
  511. 40,
  512. 43,
  513. 46,
  514. 49,
  515. 55,
  516. 61,
  517. 67,
  518. 73,
  519. 79,
  520. 85,
  521. 91,
  522. 97,
  523. ]
  524. )
  525. tm.assert_index_equal(result, expected)
  526. def test_union_with_duplicate_index_not_subset_and_non_monotonic(
  527. any_dtype_for_small_pos_integer_indexes,
  528. ):
  529. # GH#36289
  530. dtype = any_dtype_for_small_pos_integer_indexes
  531. a = Index([1, 0, 2], dtype=dtype)
  532. b = Index([0, 0, 1], dtype=dtype)
  533. expected = Index([0, 0, 1, 2], dtype=dtype)
  534. if isinstance(a, CategoricalIndex):
  535. expected = Index([0, 0, 1, 2])
  536. result = a.union(b)
  537. tm.assert_index_equal(result, expected)
  538. result = b.union(a)
  539. tm.assert_index_equal(result, expected)
  540. def test_union_int_categorical_with_nan():
  541. ci = CategoricalIndex([1, 2, np.nan])
  542. assert ci.categories.dtype.kind == "i"
  543. idx = Index([1, 2])
  544. result = idx.union(ci)
  545. expected = Index([1, 2, np.nan], dtype=np.float64)
  546. tm.assert_index_equal(result, expected)
  547. result = ci.union(idx)
  548. tm.assert_index_equal(result, expected)
  549. class TestSetOpsUnsorted:
  550. # These may eventually belong in a dtype-specific test_setops, or
  551. # parametrized over a more general fixture
  552. def test_intersect_str_dates(self):
  553. dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]
  554. index1 = Index(dt_dates, dtype=object)
  555. index2 = Index(["aa"], dtype=object)
  556. result = index2.intersection(index1)
  557. expected = Index([], dtype=object)
  558. tm.assert_index_equal(result, expected)
  559. @pytest.mark.parametrize("index", ["string"], indirect=True)
  560. def test_intersection(self, index, sort):
  561. first = index[:20]
  562. second = index[:10]
  563. intersect = first.intersection(second, sort=sort)
  564. if sort is None:
  565. tm.assert_index_equal(intersect, second.sort_values())
  566. assert tm.equalContents(intersect, second)
  567. # Corner cases
  568. inter = first.intersection(first, sort=sort)
  569. assert inter is first
  570. @pytest.mark.parametrize(
  571. "index2,keeps_name",
  572. [
  573. (Index([3, 4, 5, 6, 7], name="index"), True), # preserve same name
  574. (Index([3, 4, 5, 6, 7], name="other"), False), # drop diff names
  575. (Index([3, 4, 5, 6, 7]), False),
  576. ],
  577. )
  578. def test_intersection_name_preservation(self, index2, keeps_name, sort):
  579. index1 = Index([1, 2, 3, 4, 5], name="index")
  580. expected = Index([3, 4, 5])
  581. result = index1.intersection(index2, sort)
  582. if keeps_name:
  583. expected.name = "index"
  584. assert result.name == expected.name
  585. tm.assert_index_equal(result, expected)
  586. @pytest.mark.parametrize("index", ["string"], indirect=True)
  587. @pytest.mark.parametrize(
  588. "first_name,second_name,expected_name",
  589. [("A", "A", "A"), ("A", "B", None), (None, "B", None)],
  590. )
  591. def test_intersection_name_preservation2(
  592. self, index, first_name, second_name, expected_name, sort
  593. ):
  594. first = index[5:20]
  595. second = index[:10]
  596. first.name = first_name
  597. second.name = second_name
  598. intersect = first.intersection(second, sort=sort)
  599. assert intersect.name == expected_name
  600. def test_chained_union(self, sort):
  601. # Chained unions handles names correctly
  602. i1 = Index([1, 2], name="i1")
  603. i2 = Index([5, 6], name="i2")
  604. i3 = Index([3, 4], name="i3")
  605. union = i1.union(i2.union(i3, sort=sort), sort=sort)
  606. expected = i1.union(i2, sort=sort).union(i3, sort=sort)
  607. tm.assert_index_equal(union, expected)
  608. j1 = Index([1, 2], name="j1")
  609. j2 = Index([], name="j2")
  610. j3 = Index([], name="j3")
  611. union = j1.union(j2.union(j3, sort=sort), sort=sort)
  612. expected = j1.union(j2, sort=sort).union(j3, sort=sort)
  613. tm.assert_index_equal(union, expected)
  614. @pytest.mark.parametrize("index", ["string"], indirect=True)
  615. def test_union(self, index, sort):
  616. first = index[5:20]
  617. second = index[:10]
  618. everything = index[:20]
  619. union = first.union(second, sort=sort)
  620. if sort is None:
  621. tm.assert_index_equal(union, everything.sort_values())
  622. assert tm.equalContents(union, everything)
  623. @pytest.mark.parametrize("klass", [np.array, Series, list])
  624. @pytest.mark.parametrize("index", ["string"], indirect=True)
  625. def test_union_from_iterables(self, index, klass, sort):
  626. # GH#10149
  627. first = index[5:20]
  628. second = index[:10]
  629. everything = index[:20]
  630. case = klass(second.values)
  631. result = first.union(case, sort=sort)
  632. if sort is None:
  633. tm.assert_index_equal(result, everything.sort_values())
  634. assert tm.equalContents(result, everything)
  635. @pytest.mark.parametrize("index", ["string"], indirect=True)
  636. def test_union_identity(self, index, sort):
  637. first = index[5:20]
  638. union = first.union(first, sort=sort)
  639. # i.e. identity is not preserved when sort is True
  640. assert (union is first) is (not sort)
  641. # This should no longer be the same object, since [] is not consistent,
  642. # both objects will be recast to dtype('O')
  643. union = first.union([], sort=sort)
  644. assert (union is first) is (not sort)
  645. union = Index([]).union(first, sort=sort)
  646. assert (union is first) is (not sort)
  647. @pytest.mark.parametrize("index", ["string"], indirect=True)
  648. @pytest.mark.parametrize("second_name,expected", [(None, None), ("name", "name")])
  649. def test_difference_name_preservation(self, index, second_name, expected, sort):
  650. first = index[5:20]
  651. second = index[:10]
  652. answer = index[10:20]
  653. first.name = "name"
  654. second.name = second_name
  655. result = first.difference(second, sort=sort)
  656. assert tm.equalContents(result, answer)
  657. if expected is None:
  658. assert result.name is None
  659. else:
  660. assert result.name == expected
  661. def test_difference_empty_arg(self, index, sort):
  662. first = index[5:20]
  663. first.name = "name"
  664. result = first.difference([], sort)
  665. tm.assert_index_equal(result, first)
  666. @pytest.mark.parametrize("index", ["string"], indirect=True)
  667. def test_difference_identity(self, index, sort):
  668. first = index[5:20]
  669. first.name = "name"
  670. result = first.difference(first, sort)
  671. assert len(result) == 0
  672. assert result.name == first.name
  673. @pytest.mark.parametrize("index", ["string"], indirect=True)
  674. def test_difference_sort(self, index, sort):
  675. first = index[5:20]
  676. second = index[:10]
  677. result = first.difference(second, sort)
  678. expected = index[10:20]
  679. if sort is None:
  680. expected = expected.sort_values()
  681. tm.assert_index_equal(result, expected)
  682. @pytest.mark.parametrize("opname", ["difference", "symmetric_difference"])
  683. def test_difference_incomparable(self, opname):
  684. a = Index([3, Timestamp("2000"), 1])
  685. b = Index([2, Timestamp("1999"), 1])
  686. op = operator.methodcaller(opname, b)
  687. with tm.assert_produces_warning(RuntimeWarning):
  688. # sort=None, the default
  689. result = op(a)
  690. expected = Index([3, Timestamp("2000"), 2, Timestamp("1999")])
  691. if opname == "difference":
  692. expected = expected[:2]
  693. tm.assert_index_equal(result, expected)
  694. # sort=False
  695. op = operator.methodcaller(opname, b, sort=False)
  696. result = op(a)
  697. tm.assert_index_equal(result, expected)
  698. @pytest.mark.parametrize("opname", ["difference", "symmetric_difference"])
  699. def test_difference_incomparable_true(self, opname):
  700. a = Index([3, Timestamp("2000"), 1])
  701. b = Index([2, Timestamp("1999"), 1])
  702. op = operator.methodcaller(opname, b, sort=True)
  703. msg = "'<' not supported between instances of 'Timestamp' and 'int'"
  704. with pytest.raises(TypeError, match=msg):
  705. op(a)
  706. def test_symmetric_difference_mi(self, sort):
  707. index1 = MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3]))
  708. index2 = MultiIndex.from_tuples([("foo", 1), ("bar", 3)])
  709. result = index1.symmetric_difference(index2, sort=sort)
  710. expected = MultiIndex.from_tuples([("bar", 2), ("baz", 3), ("bar", 3)])
  711. if sort is None:
  712. expected = expected.sort_values()
  713. tm.assert_index_equal(result, expected)
  714. assert tm.equalContents(result, expected)
  715. @pytest.mark.parametrize(
  716. "index2,expected",
  717. [
  718. (Index([0, 1, np.nan]), Index([2.0, 3.0, 0.0])),
  719. (Index([0, 1]), Index([np.nan, 2.0, 3.0, 0.0])),
  720. ],
  721. )
  722. def test_symmetric_difference_missing(self, index2, expected, sort):
  723. # GH#13514 change: {nan} - {nan} == {}
  724. # (GH#6444, sorting of nans, is no longer an issue)
  725. index1 = Index([1, np.nan, 2, 3])
  726. result = index1.symmetric_difference(index2, sort=sort)
  727. if sort is None:
  728. expected = expected.sort_values()
  729. tm.assert_index_equal(result, expected)
  730. def test_symmetric_difference_non_index(self, sort):
  731. index1 = Index([1, 2, 3, 4], name="index1")
  732. index2 = np.array([2, 3, 4, 5])
  733. expected = Index([1, 5])
  734. result = index1.symmetric_difference(index2, sort=sort)
  735. assert tm.equalContents(result, expected)
  736. assert result.name == "index1"
  737. result = index1.symmetric_difference(index2, result_name="new_name", sort=sort)
  738. assert tm.equalContents(result, expected)
  739. assert result.name == "new_name"
  740. def test_union_ea_dtypes(self, any_numeric_ea_and_arrow_dtype):
  741. # GH#51365
  742. idx = Index([1, 2, 3], dtype=any_numeric_ea_and_arrow_dtype)
  743. idx2 = Index([3, 4, 5], dtype=any_numeric_ea_and_arrow_dtype)
  744. result = idx.union(idx2)
  745. expected = Index([1, 2, 3, 4, 5], dtype=any_numeric_ea_and_arrow_dtype)
  746. tm.assert_index_equal(result, expected)