test_arraysetops.py 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944
  1. """Test functions for 1D array set operations.
  2. """
  3. import numpy as np
  4. from numpy.testing import (assert_array_equal, assert_equal,
  5. assert_raises, assert_raises_regex)
  6. from numpy.lib.arraysetops import (
  7. ediff1d, intersect1d, setxor1d, union1d, setdiff1d, unique, in1d, isin
  8. )
  9. import pytest
  10. class TestSetOps:
  11. def test_intersect1d(self):
  12. # unique inputs
  13. a = np.array([5, 7, 1, 2])
  14. b = np.array([2, 4, 3, 1, 5])
  15. ec = np.array([1, 2, 5])
  16. c = intersect1d(a, b, assume_unique=True)
  17. assert_array_equal(c, ec)
  18. # non-unique inputs
  19. a = np.array([5, 5, 7, 1, 2])
  20. b = np.array([2, 1, 4, 3, 3, 1, 5])
  21. ed = np.array([1, 2, 5])
  22. c = intersect1d(a, b)
  23. assert_array_equal(c, ed)
  24. assert_array_equal([], intersect1d([], []))
  25. def test_intersect1d_array_like(self):
  26. # See gh-11772
  27. class Test:
  28. def __array__(self):
  29. return np.arange(3)
  30. a = Test()
  31. res = intersect1d(a, a)
  32. assert_array_equal(res, a)
  33. res = intersect1d([1, 2, 3], [1, 2, 3])
  34. assert_array_equal(res, [1, 2, 3])
  35. def test_intersect1d_indices(self):
  36. # unique inputs
  37. a = np.array([1, 2, 3, 4])
  38. b = np.array([2, 1, 4, 6])
  39. c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True)
  40. ee = np.array([1, 2, 4])
  41. assert_array_equal(c, ee)
  42. assert_array_equal(a[i1], ee)
  43. assert_array_equal(b[i2], ee)
  44. # non-unique inputs
  45. a = np.array([1, 2, 2, 3, 4, 3, 2])
  46. b = np.array([1, 8, 4, 2, 2, 3, 2, 3])
  47. c, i1, i2 = intersect1d(a, b, return_indices=True)
  48. ef = np.array([1, 2, 3, 4])
  49. assert_array_equal(c, ef)
  50. assert_array_equal(a[i1], ef)
  51. assert_array_equal(b[i2], ef)
  52. # non1d, unique inputs
  53. a = np.array([[2, 4, 5, 6], [7, 8, 1, 15]])
  54. b = np.array([[3, 2, 7, 6], [10, 12, 8, 9]])
  55. c, i1, i2 = intersect1d(a, b, assume_unique=True, return_indices=True)
  56. ui1 = np.unravel_index(i1, a.shape)
  57. ui2 = np.unravel_index(i2, b.shape)
  58. ea = np.array([2, 6, 7, 8])
  59. assert_array_equal(ea, a[ui1])
  60. assert_array_equal(ea, b[ui2])
  61. # non1d, not assumed to be uniqueinputs
  62. a = np.array([[2, 4, 5, 6, 6], [4, 7, 8, 7, 2]])
  63. b = np.array([[3, 2, 7, 7], [10, 12, 8, 7]])
  64. c, i1, i2 = intersect1d(a, b, return_indices=True)
  65. ui1 = np.unravel_index(i1, a.shape)
  66. ui2 = np.unravel_index(i2, b.shape)
  67. ea = np.array([2, 7, 8])
  68. assert_array_equal(ea, a[ui1])
  69. assert_array_equal(ea, b[ui2])
  70. def test_setxor1d(self):
  71. a = np.array([5, 7, 1, 2])
  72. b = np.array([2, 4, 3, 1, 5])
  73. ec = np.array([3, 4, 7])
  74. c = setxor1d(a, b)
  75. assert_array_equal(c, ec)
  76. a = np.array([1, 2, 3])
  77. b = np.array([6, 5, 4])
  78. ec = np.array([1, 2, 3, 4, 5, 6])
  79. c = setxor1d(a, b)
  80. assert_array_equal(c, ec)
  81. a = np.array([1, 8, 2, 3])
  82. b = np.array([6, 5, 4, 8])
  83. ec = np.array([1, 2, 3, 4, 5, 6])
  84. c = setxor1d(a, b)
  85. assert_array_equal(c, ec)
  86. assert_array_equal([], setxor1d([], []))
  87. def test_ediff1d(self):
  88. zero_elem = np.array([])
  89. one_elem = np.array([1])
  90. two_elem = np.array([1, 2])
  91. assert_array_equal([], ediff1d(zero_elem))
  92. assert_array_equal([0], ediff1d(zero_elem, to_begin=0))
  93. assert_array_equal([0], ediff1d(zero_elem, to_end=0))
  94. assert_array_equal([-1, 0], ediff1d(zero_elem, to_begin=-1, to_end=0))
  95. assert_array_equal([], ediff1d(one_elem))
  96. assert_array_equal([1], ediff1d(two_elem))
  97. assert_array_equal([7, 1, 9], ediff1d(two_elem, to_begin=7, to_end=9))
  98. assert_array_equal([5, 6, 1, 7, 8],
  99. ediff1d(two_elem, to_begin=[5, 6], to_end=[7, 8]))
  100. assert_array_equal([1, 9], ediff1d(two_elem, to_end=9))
  101. assert_array_equal([1, 7, 8], ediff1d(two_elem, to_end=[7, 8]))
  102. assert_array_equal([7, 1], ediff1d(two_elem, to_begin=7))
  103. assert_array_equal([5, 6, 1], ediff1d(two_elem, to_begin=[5, 6]))
  104. @pytest.mark.parametrize("ary, prepend, append, expected", [
  105. # should fail because trying to cast
  106. # np.nan standard floating point value
  107. # into an integer array:
  108. (np.array([1, 2, 3], dtype=np.int64),
  109. None,
  110. np.nan,
  111. 'to_end'),
  112. # should fail because attempting
  113. # to downcast to int type:
  114. (np.array([1, 2, 3], dtype=np.int64),
  115. np.array([5, 7, 2], dtype=np.float32),
  116. None,
  117. 'to_begin'),
  118. # should fail because attempting to cast
  119. # two special floating point values
  120. # to integers (on both sides of ary),
  121. # `to_begin` is in the error message as the impl checks this first:
  122. (np.array([1., 3., 9.], dtype=np.int8),
  123. np.nan,
  124. np.nan,
  125. 'to_begin'),
  126. ])
  127. def test_ediff1d_forbidden_type_casts(self, ary, prepend, append, expected):
  128. # verify resolution of gh-11490
  129. # specifically, raise an appropriate
  130. # Exception when attempting to append or
  131. # prepend with an incompatible type
  132. msg = 'dtype of `{}` must be compatible'.format(expected)
  133. with assert_raises_regex(TypeError, msg):
  134. ediff1d(ary=ary,
  135. to_end=append,
  136. to_begin=prepend)
  137. @pytest.mark.parametrize(
  138. "ary,prepend,append,expected",
  139. [
  140. (np.array([1, 2, 3], dtype=np.int16),
  141. 2**16, # will be cast to int16 under same kind rule.
  142. 2**16 + 4,
  143. np.array([0, 1, 1, 4], dtype=np.int16)),
  144. (np.array([1, 2, 3], dtype=np.float32),
  145. np.array([5], dtype=np.float64),
  146. None,
  147. np.array([5, 1, 1], dtype=np.float32)),
  148. (np.array([1, 2, 3], dtype=np.int32),
  149. 0,
  150. 0,
  151. np.array([0, 1, 1, 0], dtype=np.int32)),
  152. (np.array([1, 2, 3], dtype=np.int64),
  153. 3,
  154. -9,
  155. np.array([3, 1, 1, -9], dtype=np.int64)),
  156. ]
  157. )
  158. def test_ediff1d_scalar_handling(self,
  159. ary,
  160. prepend,
  161. append,
  162. expected):
  163. # maintain backwards-compatibility
  164. # of scalar prepend / append behavior
  165. # in ediff1d following fix for gh-11490
  166. actual = np.ediff1d(ary=ary,
  167. to_end=append,
  168. to_begin=prepend)
  169. assert_equal(actual, expected)
  170. assert actual.dtype == expected.dtype
  171. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  172. def test_isin(self, kind):
  173. # the tests for in1d cover most of isin's behavior
  174. # if in1d is removed, would need to change those tests to test
  175. # isin instead.
  176. def _isin_slow(a, b):
  177. b = np.asarray(b).flatten().tolist()
  178. return a in b
  179. isin_slow = np.vectorize(_isin_slow, otypes=[bool], excluded={1})
  180. def assert_isin_equal(a, b):
  181. x = isin(a, b, kind=kind)
  182. y = isin_slow(a, b)
  183. assert_array_equal(x, y)
  184. # multidimensional arrays in both arguments
  185. a = np.arange(24).reshape([2, 3, 4])
  186. b = np.array([[10, 20, 30], [0, 1, 3], [11, 22, 33]])
  187. assert_isin_equal(a, b)
  188. # array-likes as both arguments
  189. c = [(9, 8), (7, 6)]
  190. d = (9, 7)
  191. assert_isin_equal(c, d)
  192. # zero-d array:
  193. f = np.array(3)
  194. assert_isin_equal(f, b)
  195. assert_isin_equal(a, f)
  196. assert_isin_equal(f, f)
  197. # scalar:
  198. assert_isin_equal(5, b)
  199. assert_isin_equal(a, 6)
  200. assert_isin_equal(5, 6)
  201. # empty array-like:
  202. if kind != "table":
  203. # An empty list will become float64,
  204. # which is invalid for kind="table"
  205. x = []
  206. assert_isin_equal(x, b)
  207. assert_isin_equal(a, x)
  208. assert_isin_equal(x, x)
  209. # empty array with various types:
  210. for dtype in [bool, np.int64, np.float64]:
  211. if kind == "table" and dtype == np.float64:
  212. continue
  213. if dtype in {np.int64, np.float64}:
  214. ar = np.array([10, 20, 30], dtype=dtype)
  215. elif dtype in {bool}:
  216. ar = np.array([True, False, False])
  217. empty_array = np.array([], dtype=dtype)
  218. assert_isin_equal(empty_array, ar)
  219. assert_isin_equal(ar, empty_array)
  220. assert_isin_equal(empty_array, empty_array)
  221. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  222. def test_in1d(self, kind):
  223. # we use two different sizes for the b array here to test the
  224. # two different paths in in1d().
  225. for mult in (1, 10):
  226. # One check without np.array to make sure lists are handled correct
  227. a = [5, 7, 1, 2]
  228. b = [2, 4, 3, 1, 5] * mult
  229. ec = np.array([True, False, True, True])
  230. c = in1d(a, b, assume_unique=True, kind=kind)
  231. assert_array_equal(c, ec)
  232. a[0] = 8
  233. ec = np.array([False, False, True, True])
  234. c = in1d(a, b, assume_unique=True, kind=kind)
  235. assert_array_equal(c, ec)
  236. a[0], a[3] = 4, 8
  237. ec = np.array([True, False, True, False])
  238. c = in1d(a, b, assume_unique=True, kind=kind)
  239. assert_array_equal(c, ec)
  240. a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
  241. b = [2, 3, 4] * mult
  242. ec = [False, True, False, True, True, True, True, True, True,
  243. False, True, False, False, False]
  244. c = in1d(a, b, kind=kind)
  245. assert_array_equal(c, ec)
  246. b = b + [5, 5, 4] * mult
  247. ec = [True, True, True, True, True, True, True, True, True, True,
  248. True, False, True, True]
  249. c = in1d(a, b, kind=kind)
  250. assert_array_equal(c, ec)
  251. a = np.array([5, 7, 1, 2])
  252. b = np.array([2, 4, 3, 1, 5] * mult)
  253. ec = np.array([True, False, True, True])
  254. c = in1d(a, b, kind=kind)
  255. assert_array_equal(c, ec)
  256. a = np.array([5, 7, 1, 1, 2])
  257. b = np.array([2, 4, 3, 3, 1, 5] * mult)
  258. ec = np.array([True, False, True, True, True])
  259. c = in1d(a, b, kind=kind)
  260. assert_array_equal(c, ec)
  261. a = np.array([5, 5])
  262. b = np.array([2, 2] * mult)
  263. ec = np.array([False, False])
  264. c = in1d(a, b, kind=kind)
  265. assert_array_equal(c, ec)
  266. a = np.array([5])
  267. b = np.array([2])
  268. ec = np.array([False])
  269. c = in1d(a, b, kind=kind)
  270. assert_array_equal(c, ec)
  271. if kind in {None, "sort"}:
  272. assert_array_equal(in1d([], [], kind=kind), [])
  273. def test_in1d_char_array(self):
  274. a = np.array(['a', 'b', 'c', 'd', 'e', 'c', 'e', 'b'])
  275. b = np.array(['a', 'c'])
  276. ec = np.array([True, False, True, False, False, True, False, False])
  277. c = in1d(a, b)
  278. assert_array_equal(c, ec)
  279. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  280. def test_in1d_invert(self, kind):
  281. "Test in1d's invert parameter"
  282. # We use two different sizes for the b array here to test the
  283. # two different paths in in1d().
  284. for mult in (1, 10):
  285. a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5])
  286. b = [2, 3, 4] * mult
  287. assert_array_equal(np.invert(in1d(a, b, kind=kind)),
  288. in1d(a, b, invert=True, kind=kind))
  289. # float:
  290. if kind in {None, "sort"}:
  291. for mult in (1, 10):
  292. a = np.array([5, 4, 5, 3, 4, 4, 3, 4, 3, 5, 2, 1, 5, 5],
  293. dtype=np.float32)
  294. b = [2, 3, 4] * mult
  295. b = np.array(b, dtype=np.float32)
  296. assert_array_equal(np.invert(in1d(a, b, kind=kind)),
  297. in1d(a, b, invert=True, kind=kind))
  298. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  299. def test_in1d_ravel(self, kind):
  300. # Test that in1d ravels its input arrays. This is not documented
  301. # behavior however. The test is to ensure consistentency.
  302. a = np.arange(6).reshape(2, 3)
  303. b = np.arange(3, 9).reshape(3, 2)
  304. long_b = np.arange(3, 63).reshape(30, 2)
  305. ec = np.array([False, False, False, True, True, True])
  306. assert_array_equal(in1d(a, b, assume_unique=True, kind=kind),
  307. ec)
  308. assert_array_equal(in1d(a, b, assume_unique=False,
  309. kind=kind),
  310. ec)
  311. assert_array_equal(in1d(a, long_b, assume_unique=True,
  312. kind=kind),
  313. ec)
  314. assert_array_equal(in1d(a, long_b, assume_unique=False,
  315. kind=kind),
  316. ec)
  317. def test_in1d_hit_alternate_algorithm(self):
  318. """Hit the standard isin code with integers"""
  319. # Need extreme range to hit standard code
  320. # This hits it without the use of kind='table'
  321. a = np.array([5, 4, 5, 3, 4, 4, 1e9], dtype=np.int64)
  322. b = np.array([2, 3, 4, 1e9], dtype=np.int64)
  323. expected = np.array([0, 1, 0, 1, 1, 1, 1], dtype=bool)
  324. assert_array_equal(expected, in1d(a, b))
  325. assert_array_equal(np.invert(expected), in1d(a, b, invert=True))
  326. a = np.array([5, 7, 1, 2], dtype=np.int64)
  327. b = np.array([2, 4, 3, 1, 5, 1e9], dtype=np.int64)
  328. ec = np.array([True, False, True, True])
  329. c = in1d(a, b, assume_unique=True)
  330. assert_array_equal(c, ec)
  331. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  332. def test_in1d_boolean(self, kind):
  333. """Test that in1d works for boolean input"""
  334. a = np.array([True, False])
  335. b = np.array([False, False, False])
  336. expected = np.array([False, True])
  337. assert_array_equal(expected,
  338. in1d(a, b, kind=kind))
  339. assert_array_equal(np.invert(expected),
  340. in1d(a, b, invert=True, kind=kind))
  341. @pytest.mark.parametrize("kind", [None, "sort"])
  342. def test_in1d_timedelta(self, kind):
  343. """Test that in1d works for timedelta input"""
  344. rstate = np.random.RandomState(0)
  345. a = rstate.randint(0, 100, size=10)
  346. b = rstate.randint(0, 100, size=10)
  347. truth = in1d(a, b)
  348. a_timedelta = a.astype("timedelta64[s]")
  349. b_timedelta = b.astype("timedelta64[s]")
  350. assert_array_equal(truth, in1d(a_timedelta, b_timedelta, kind=kind))
  351. def test_in1d_table_timedelta_fails(self):
  352. a = np.array([0, 1, 2], dtype="timedelta64[s]")
  353. b = a
  354. # Make sure it raises a value error:
  355. with pytest.raises(ValueError):
  356. in1d(a, b, kind="table")
  357. @pytest.mark.parametrize(
  358. "dtype1,dtype2",
  359. [
  360. (np.int8, np.int16),
  361. (np.int16, np.int8),
  362. (np.uint8, np.uint16),
  363. (np.uint16, np.uint8),
  364. (np.uint8, np.int16),
  365. (np.int16, np.uint8),
  366. ]
  367. )
  368. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  369. def test_in1d_mixed_dtype(self, dtype1, dtype2, kind):
  370. """Test that in1d works as expected for mixed dtype input."""
  371. is_dtype2_signed = np.issubdtype(dtype2, np.signedinteger)
  372. ar1 = np.array([0, 0, 1, 1], dtype=dtype1)
  373. if is_dtype2_signed:
  374. ar2 = np.array([-128, 0, 127], dtype=dtype2)
  375. else:
  376. ar2 = np.array([127, 0, 255], dtype=dtype2)
  377. expected = np.array([True, True, False, False])
  378. expect_failure = kind == "table" and any((
  379. dtype1 == np.int8 and dtype2 == np.int16,
  380. dtype1 == np.int16 and dtype2 == np.int8
  381. ))
  382. if expect_failure:
  383. with pytest.raises(RuntimeError, match="exceed the maximum"):
  384. in1d(ar1, ar2, kind=kind)
  385. else:
  386. assert_array_equal(in1d(ar1, ar2, kind=kind), expected)
  387. @pytest.mark.parametrize("kind", [None, "sort", "table"])
  388. def test_in1d_mixed_boolean(self, kind):
  389. """Test that in1d works as expected for bool/int input."""
  390. for dtype in np.typecodes["AllInteger"]:
  391. a = np.array([True, False, False], dtype=bool)
  392. b = np.array([0, 0, 0, 0], dtype=dtype)
  393. expected = np.array([False, True, True], dtype=bool)
  394. assert_array_equal(in1d(a, b, kind=kind), expected)
  395. a, b = b, a
  396. expected = np.array([True, True, True, True], dtype=bool)
  397. assert_array_equal(in1d(a, b, kind=kind), expected)
  398. def test_in1d_first_array_is_object(self):
  399. ar1 = [None]
  400. ar2 = np.array([1]*10)
  401. expected = np.array([False])
  402. result = np.in1d(ar1, ar2)
  403. assert_array_equal(result, expected)
  404. def test_in1d_second_array_is_object(self):
  405. ar1 = 1
  406. ar2 = np.array([None]*10)
  407. expected = np.array([False])
  408. result = np.in1d(ar1, ar2)
  409. assert_array_equal(result, expected)
  410. def test_in1d_both_arrays_are_object(self):
  411. ar1 = [None]
  412. ar2 = np.array([None]*10)
  413. expected = np.array([True])
  414. result = np.in1d(ar1, ar2)
  415. assert_array_equal(result, expected)
  416. def test_in1d_both_arrays_have_structured_dtype(self):
  417. # Test arrays of a structured data type containing an integer field
  418. # and a field of dtype `object` allowing for arbitrary Python objects
  419. dt = np.dtype([('field1', int), ('field2', object)])
  420. ar1 = np.array([(1, None)], dtype=dt)
  421. ar2 = np.array([(1, None)]*10, dtype=dt)
  422. expected = np.array([True])
  423. result = np.in1d(ar1, ar2)
  424. assert_array_equal(result, expected)
  425. def test_in1d_with_arrays_containing_tuples(self):
  426. ar1 = np.array([(1,), 2], dtype=object)
  427. ar2 = np.array([(1,), 2], dtype=object)
  428. expected = np.array([True, True])
  429. result = np.in1d(ar1, ar2)
  430. assert_array_equal(result, expected)
  431. result = np.in1d(ar1, ar2, invert=True)
  432. assert_array_equal(result, np.invert(expected))
  433. # An integer is added at the end of the array to make sure
  434. # that the array builder will create the array with tuples
  435. # and after it's created the integer is removed.
  436. # There's a bug in the array constructor that doesn't handle
  437. # tuples properly and adding the integer fixes that.
  438. ar1 = np.array([(1,), (2, 1), 1], dtype=object)
  439. ar1 = ar1[:-1]
  440. ar2 = np.array([(1,), (2, 1), 1], dtype=object)
  441. ar2 = ar2[:-1]
  442. expected = np.array([True, True])
  443. result = np.in1d(ar1, ar2)
  444. assert_array_equal(result, expected)
  445. result = np.in1d(ar1, ar2, invert=True)
  446. assert_array_equal(result, np.invert(expected))
  447. ar1 = np.array([(1,), (2, 3), 1], dtype=object)
  448. ar1 = ar1[:-1]
  449. ar2 = np.array([(1,), 2], dtype=object)
  450. expected = np.array([True, False])
  451. result = np.in1d(ar1, ar2)
  452. assert_array_equal(result, expected)
  453. result = np.in1d(ar1, ar2, invert=True)
  454. assert_array_equal(result, np.invert(expected))
  455. def test_in1d_errors(self):
  456. """Test that in1d raises expected errors."""
  457. # Error 1: `kind` is not one of 'sort' 'table' or None.
  458. ar1 = np.array([1, 2, 3, 4, 5])
  459. ar2 = np.array([2, 4, 6, 8, 10])
  460. assert_raises(ValueError, in1d, ar1, ar2, kind='quicksort')
  461. # Error 2: `kind="table"` does not work for non-integral arrays.
  462. obj_ar1 = np.array([1, 'a', 3, 'b', 5], dtype=object)
  463. obj_ar2 = np.array([1, 'a', 3, 'b', 5], dtype=object)
  464. assert_raises(ValueError, in1d, obj_ar1, obj_ar2, kind='table')
  465. for dtype in [np.int32, np.int64]:
  466. ar1 = np.array([-1, 2, 3, 4, 5], dtype=dtype)
  467. # The range of this array will overflow:
  468. overflow_ar2 = np.array([-1, np.iinfo(dtype).max], dtype=dtype)
  469. # Error 3: `kind="table"` will trigger a runtime error
  470. # if there is an integer overflow expected when computing the
  471. # range of ar2
  472. assert_raises(
  473. RuntimeError,
  474. in1d, ar1, overflow_ar2, kind='table'
  475. )
  476. # Non-error: `kind=None` will *not* trigger a runtime error
  477. # if there is an integer overflow, it will switch to
  478. # the `sort` algorithm.
  479. result = np.in1d(ar1, overflow_ar2, kind=None)
  480. assert_array_equal(result, [True] + [False] * 4)
  481. result = np.in1d(ar1, overflow_ar2, kind='sort')
  482. assert_array_equal(result, [True] + [False] * 4)
  483. def test_union1d(self):
  484. a = np.array([5, 4, 7, 1, 2])
  485. b = np.array([2, 4, 3, 3, 2, 1, 5])
  486. ec = np.array([1, 2, 3, 4, 5, 7])
  487. c = union1d(a, b)
  488. assert_array_equal(c, ec)
  489. # Tests gh-10340, arguments to union1d should be
  490. # flattened if they are not already 1D
  491. x = np.array([[0, 1, 2], [3, 4, 5]])
  492. y = np.array([0, 1, 2, 3, 4])
  493. ez = np.array([0, 1, 2, 3, 4, 5])
  494. z = union1d(x, y)
  495. assert_array_equal(z, ez)
  496. assert_array_equal([], union1d([], []))
  497. def test_setdiff1d(self):
  498. a = np.array([6, 5, 4, 7, 1, 2, 7, 4])
  499. b = np.array([2, 4, 3, 3, 2, 1, 5])
  500. ec = np.array([6, 7])
  501. c = setdiff1d(a, b)
  502. assert_array_equal(c, ec)
  503. a = np.arange(21)
  504. b = np.arange(19)
  505. ec = np.array([19, 20])
  506. c = setdiff1d(a, b)
  507. assert_array_equal(c, ec)
  508. assert_array_equal([], setdiff1d([], []))
  509. a = np.array((), np.uint32)
  510. assert_equal(setdiff1d(a, []).dtype, np.uint32)
  511. def test_setdiff1d_unique(self):
  512. a = np.array([3, 2, 1])
  513. b = np.array([7, 5, 2])
  514. expected = np.array([3, 1])
  515. actual = setdiff1d(a, b, assume_unique=True)
  516. assert_equal(actual, expected)
  517. def test_setdiff1d_char_array(self):
  518. a = np.array(['a', 'b', 'c'])
  519. b = np.array(['a', 'b', 's'])
  520. assert_array_equal(setdiff1d(a, b), np.array(['c']))
  521. def test_manyways(self):
  522. a = np.array([5, 7, 1, 2, 8])
  523. b = np.array([9, 8, 2, 4, 3, 1, 5])
  524. c1 = setxor1d(a, b)
  525. aux1 = intersect1d(a, b)
  526. aux2 = union1d(a, b)
  527. c2 = setdiff1d(aux2, aux1)
  528. assert_array_equal(c1, c2)
  529. class TestUnique:
  530. def test_unique_1d(self):
  531. def check_all(a, b, i1, i2, c, dt):
  532. base_msg = 'check {0} failed for type {1}'
  533. msg = base_msg.format('values', dt)
  534. v = unique(a)
  535. assert_array_equal(v, b, msg)
  536. msg = base_msg.format('return_index', dt)
  537. v, j = unique(a, True, False, False)
  538. assert_array_equal(v, b, msg)
  539. assert_array_equal(j, i1, msg)
  540. msg = base_msg.format('return_inverse', dt)
  541. v, j = unique(a, False, True, False)
  542. assert_array_equal(v, b, msg)
  543. assert_array_equal(j, i2, msg)
  544. msg = base_msg.format('return_counts', dt)
  545. v, j = unique(a, False, False, True)
  546. assert_array_equal(v, b, msg)
  547. assert_array_equal(j, c, msg)
  548. msg = base_msg.format('return_index and return_inverse', dt)
  549. v, j1, j2 = unique(a, True, True, False)
  550. assert_array_equal(v, b, msg)
  551. assert_array_equal(j1, i1, msg)
  552. assert_array_equal(j2, i2, msg)
  553. msg = base_msg.format('return_index and return_counts', dt)
  554. v, j1, j2 = unique(a, True, False, True)
  555. assert_array_equal(v, b, msg)
  556. assert_array_equal(j1, i1, msg)
  557. assert_array_equal(j2, c, msg)
  558. msg = base_msg.format('return_inverse and return_counts', dt)
  559. v, j1, j2 = unique(a, False, True, True)
  560. assert_array_equal(v, b, msg)
  561. assert_array_equal(j1, i2, msg)
  562. assert_array_equal(j2, c, msg)
  563. msg = base_msg.format(('return_index, return_inverse '
  564. 'and return_counts'), dt)
  565. v, j1, j2, j3 = unique(a, True, True, True)
  566. assert_array_equal(v, b, msg)
  567. assert_array_equal(j1, i1, msg)
  568. assert_array_equal(j2, i2, msg)
  569. assert_array_equal(j3, c, msg)
  570. a = [5, 7, 1, 2, 1, 5, 7]*10
  571. b = [1, 2, 5, 7]
  572. i1 = [2, 3, 0, 1]
  573. i2 = [2, 3, 0, 1, 0, 2, 3]*10
  574. c = np.multiply([2, 1, 2, 2], 10)
  575. # test for numeric arrays
  576. types = []
  577. types.extend(np.typecodes['AllInteger'])
  578. types.extend(np.typecodes['AllFloat'])
  579. types.append('datetime64[D]')
  580. types.append('timedelta64[D]')
  581. for dt in types:
  582. aa = np.array(a, dt)
  583. bb = np.array(b, dt)
  584. check_all(aa, bb, i1, i2, c, dt)
  585. # test for object arrays
  586. dt = 'O'
  587. aa = np.empty(len(a), dt)
  588. aa[:] = a
  589. bb = np.empty(len(b), dt)
  590. bb[:] = b
  591. check_all(aa, bb, i1, i2, c, dt)
  592. # test for structured arrays
  593. dt = [('', 'i'), ('', 'i')]
  594. aa = np.array(list(zip(a, a)), dt)
  595. bb = np.array(list(zip(b, b)), dt)
  596. check_all(aa, bb, i1, i2, c, dt)
  597. # test for ticket #2799
  598. aa = [1. + 0.j, 1 - 1.j, 1]
  599. assert_array_equal(np.unique(aa), [1. - 1.j, 1. + 0.j])
  600. # test for ticket #4785
  601. a = [(1, 2), (1, 2), (2, 3)]
  602. unq = [1, 2, 3]
  603. inv = [0, 1, 0, 1, 1, 2]
  604. a1 = unique(a)
  605. assert_array_equal(a1, unq)
  606. a2, a2_inv = unique(a, return_inverse=True)
  607. assert_array_equal(a2, unq)
  608. assert_array_equal(a2_inv, inv)
  609. # test for chararrays with return_inverse (gh-5099)
  610. a = np.chararray(5)
  611. a[...] = ''
  612. a2, a2_inv = np.unique(a, return_inverse=True)
  613. assert_array_equal(a2_inv, np.zeros(5))
  614. # test for ticket #9137
  615. a = []
  616. a1_idx = np.unique(a, return_index=True)[1]
  617. a2_inv = np.unique(a, return_inverse=True)[1]
  618. a3_idx, a3_inv = np.unique(a, return_index=True,
  619. return_inverse=True)[1:]
  620. assert_equal(a1_idx.dtype, np.intp)
  621. assert_equal(a2_inv.dtype, np.intp)
  622. assert_equal(a3_idx.dtype, np.intp)
  623. assert_equal(a3_inv.dtype, np.intp)
  624. # test for ticket 2111 - float
  625. a = [2.0, np.nan, 1.0, np.nan]
  626. ua = [1.0, 2.0, np.nan]
  627. ua_idx = [2, 0, 1]
  628. ua_inv = [1, 2, 0, 2]
  629. ua_cnt = [1, 1, 2]
  630. assert_equal(np.unique(a), ua)
  631. assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
  632. assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
  633. assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
  634. # test for ticket 2111 - complex
  635. a = [2.0-1j, np.nan, 1.0+1j, complex(0.0, np.nan), complex(1.0, np.nan)]
  636. ua = [1.0+1j, 2.0-1j, complex(0.0, np.nan)]
  637. ua_idx = [2, 0, 3]
  638. ua_inv = [1, 2, 0, 2, 2]
  639. ua_cnt = [1, 1, 3]
  640. assert_equal(np.unique(a), ua)
  641. assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
  642. assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
  643. assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
  644. # test for ticket 2111 - datetime64
  645. nat = np.datetime64('nat')
  646. a = [np.datetime64('2020-12-26'), nat, np.datetime64('2020-12-24'), nat]
  647. ua = [np.datetime64('2020-12-24'), np.datetime64('2020-12-26'), nat]
  648. ua_idx = [2, 0, 1]
  649. ua_inv = [1, 2, 0, 2]
  650. ua_cnt = [1, 1, 2]
  651. assert_equal(np.unique(a), ua)
  652. assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
  653. assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
  654. assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
  655. # test for ticket 2111 - timedelta
  656. nat = np.timedelta64('nat')
  657. a = [np.timedelta64(1, 'D'), nat, np.timedelta64(1, 'h'), nat]
  658. ua = [np.timedelta64(1, 'h'), np.timedelta64(1, 'D'), nat]
  659. ua_idx = [2, 0, 1]
  660. ua_inv = [1, 2, 0, 2]
  661. ua_cnt = [1, 1, 2]
  662. assert_equal(np.unique(a), ua)
  663. assert_equal(np.unique(a, return_index=True), (ua, ua_idx))
  664. assert_equal(np.unique(a, return_inverse=True), (ua, ua_inv))
  665. assert_equal(np.unique(a, return_counts=True), (ua, ua_cnt))
  666. # test for gh-19300
  667. all_nans = [np.nan] * 4
  668. ua = [np.nan]
  669. ua_idx = [0]
  670. ua_inv = [0, 0, 0, 0]
  671. ua_cnt = [4]
  672. assert_equal(np.unique(all_nans), ua)
  673. assert_equal(np.unique(all_nans, return_index=True), (ua, ua_idx))
  674. assert_equal(np.unique(all_nans, return_inverse=True), (ua, ua_inv))
  675. assert_equal(np.unique(all_nans, return_counts=True), (ua, ua_cnt))
  676. def test_unique_axis_errors(self):
  677. assert_raises(TypeError, self._run_axis_tests, object)
  678. assert_raises(TypeError, self._run_axis_tests,
  679. [('a', int), ('b', object)])
  680. assert_raises(np.AxisError, unique, np.arange(10), axis=2)
  681. assert_raises(np.AxisError, unique, np.arange(10), axis=-2)
  682. def test_unique_axis_list(self):
  683. msg = "Unique failed on list of lists"
  684. inp = [[0, 1, 0], [0, 1, 0]]
  685. inp_arr = np.asarray(inp)
  686. assert_array_equal(unique(inp, axis=0), unique(inp_arr, axis=0), msg)
  687. assert_array_equal(unique(inp, axis=1), unique(inp_arr, axis=1), msg)
  688. def test_unique_axis(self):
  689. types = []
  690. types.extend(np.typecodes['AllInteger'])
  691. types.extend(np.typecodes['AllFloat'])
  692. types.append('datetime64[D]')
  693. types.append('timedelta64[D]')
  694. types.append([('a', int), ('b', int)])
  695. types.append([('a', int), ('b', float)])
  696. for dtype in types:
  697. self._run_axis_tests(dtype)
  698. msg = 'Non-bitwise-equal booleans test failed'
  699. data = np.arange(10, dtype=np.uint8).reshape(-1, 2).view(bool)
  700. result = np.array([[False, True], [True, True]], dtype=bool)
  701. assert_array_equal(unique(data, axis=0), result, msg)
  702. msg = 'Negative zero equality test failed'
  703. data = np.array([[-0.0, 0.0], [0.0, -0.0], [-0.0, 0.0], [0.0, -0.0]])
  704. result = np.array([[-0.0, 0.0]])
  705. assert_array_equal(unique(data, axis=0), result, msg)
  706. @pytest.mark.parametrize("axis", [0, -1])
  707. def test_unique_1d_with_axis(self, axis):
  708. x = np.array([4, 3, 2, 3, 2, 1, 2, 2])
  709. uniq = unique(x, axis=axis)
  710. assert_array_equal(uniq, [1, 2, 3, 4])
  711. def test_unique_axis_zeros(self):
  712. # issue 15559
  713. single_zero = np.empty(shape=(2, 0), dtype=np.int8)
  714. uniq, idx, inv, cnt = unique(single_zero, axis=0, return_index=True,
  715. return_inverse=True, return_counts=True)
  716. # there's 1 element of shape (0,) along axis 0
  717. assert_equal(uniq.dtype, single_zero.dtype)
  718. assert_array_equal(uniq, np.empty(shape=(1, 0)))
  719. assert_array_equal(idx, np.array([0]))
  720. assert_array_equal(inv, np.array([0, 0]))
  721. assert_array_equal(cnt, np.array([2]))
  722. # there's 0 elements of shape (2,) along axis 1
  723. uniq, idx, inv, cnt = unique(single_zero, axis=1, return_index=True,
  724. return_inverse=True, return_counts=True)
  725. assert_equal(uniq.dtype, single_zero.dtype)
  726. assert_array_equal(uniq, np.empty(shape=(2, 0)))
  727. assert_array_equal(idx, np.array([]))
  728. assert_array_equal(inv, np.array([]))
  729. assert_array_equal(cnt, np.array([]))
  730. # test a "complicated" shape
  731. shape = (0, 2, 0, 3, 0, 4, 0)
  732. multiple_zeros = np.empty(shape=shape)
  733. for axis in range(len(shape)):
  734. expected_shape = list(shape)
  735. if shape[axis] == 0:
  736. expected_shape[axis] = 0
  737. else:
  738. expected_shape[axis] = 1
  739. assert_array_equal(unique(multiple_zeros, axis=axis),
  740. np.empty(shape=expected_shape))
  741. def test_unique_masked(self):
  742. # issue 8664
  743. x = np.array([64, 0, 1, 2, 3, 63, 63, 0, 0, 0, 1, 2, 0, 63, 0],
  744. dtype='uint8')
  745. y = np.ma.masked_equal(x, 0)
  746. v = np.unique(y)
  747. v2, i, c = np.unique(y, return_index=True, return_counts=True)
  748. msg = 'Unique returned different results when asked for index'
  749. assert_array_equal(v.data, v2.data, msg)
  750. assert_array_equal(v.mask, v2.mask, msg)
  751. def test_unique_sort_order_with_axis(self):
  752. # These tests fail if sorting along axis is done by treating subarrays
  753. # as unsigned byte strings. See gh-10495.
  754. fmt = "sort order incorrect for integer type '%s'"
  755. for dt in 'bhilq':
  756. a = np.array([[-1], [0]], dt)
  757. b = np.unique(a, axis=0)
  758. assert_array_equal(a, b, fmt % dt)
  759. def _run_axis_tests(self, dtype):
  760. data = np.array([[0, 1, 0, 0],
  761. [1, 0, 0, 0],
  762. [0, 1, 0, 0],
  763. [1, 0, 0, 0]]).astype(dtype)
  764. msg = 'Unique with 1d array and axis=0 failed'
  765. result = np.array([0, 1])
  766. assert_array_equal(unique(data), result.astype(dtype), msg)
  767. msg = 'Unique with 2d array and axis=0 failed'
  768. result = np.array([[0, 1, 0, 0], [1, 0, 0, 0]])
  769. assert_array_equal(unique(data, axis=0), result.astype(dtype), msg)
  770. msg = 'Unique with 2d array and axis=1 failed'
  771. result = np.array([[0, 0, 1], [0, 1, 0], [0, 0, 1], [0, 1, 0]])
  772. assert_array_equal(unique(data, axis=1), result.astype(dtype), msg)
  773. msg = 'Unique with 3d array and axis=2 failed'
  774. data3d = np.array([[[1, 1],
  775. [1, 0]],
  776. [[0, 1],
  777. [0, 0]]]).astype(dtype)
  778. result = np.take(data3d, [1, 0], axis=2)
  779. assert_array_equal(unique(data3d, axis=2), result, msg)
  780. uniq, idx, inv, cnt = unique(data, axis=0, return_index=True,
  781. return_inverse=True, return_counts=True)
  782. msg = "Unique's return_index=True failed with axis=0"
  783. assert_array_equal(data[idx], uniq, msg)
  784. msg = "Unique's return_inverse=True failed with axis=0"
  785. assert_array_equal(uniq[inv], data)
  786. msg = "Unique's return_counts=True failed with axis=0"
  787. assert_array_equal(cnt, np.array([2, 2]), msg)
  788. uniq, idx, inv, cnt = unique(data, axis=1, return_index=True,
  789. return_inverse=True, return_counts=True)
  790. msg = "Unique's return_index=True failed with axis=1"
  791. assert_array_equal(data[:, idx], uniq)
  792. msg = "Unique's return_inverse=True failed with axis=1"
  793. assert_array_equal(uniq[:, inv], data)
  794. msg = "Unique's return_counts=True failed with axis=1"
  795. assert_array_equal(cnt, np.array([2, 1, 1]), msg)
  796. def test_unique_nanequals(self):
  797. # issue 20326
  798. a = np.array([1, 1, np.nan, np.nan, np.nan])
  799. unq = np.unique(a)
  800. not_unq = np.unique(a, equal_nan=False)
  801. assert_array_equal(unq, np.array([1, np.nan]))
  802. assert_array_equal(not_unq, np.array([1, np.nan, np.nan, np.nan]))