test_hierarchy.py 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121
  1. #
  2. # Author: Damian Eads
  3. # Date: April 17, 2008
  4. #
  5. # Copyright (C) 2008 Damian Eads
  6. #
  7. # Redistribution and use in source and binary forms, with or without
  8. # modification, are permitted provided that the following conditions
  9. # are met:
  10. #
  11. # 1. Redistributions of source code must retain the above copyright
  12. # notice, this list of conditions and the following disclaimer.
  13. #
  14. # 2. Redistributions in binary form must reproduce the above
  15. # copyright notice, this list of conditions and the following
  16. # disclaimer in the documentation and/or other materials provided
  17. # with the distribution.
  18. #
  19. # 3. The name of the author may not be used to endorse or promote
  20. # products derived from this software without specific prior
  21. # written permission.
  22. #
  23. # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
  24. # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  25. # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  26. # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  27. # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  28. # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
  29. # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  30. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  31. # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  32. # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  33. # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  34. import numpy as np
  35. from numpy.testing import assert_allclose, assert_equal, assert_, assert_warns
  36. import pytest
  37. from pytest import raises as assert_raises
  38. import scipy.cluster.hierarchy
  39. from scipy.cluster.hierarchy import (
  40. ClusterWarning, linkage, from_mlab_linkage, to_mlab_linkage,
  41. num_obs_linkage, inconsistent, cophenet, fclusterdata, fcluster,
  42. is_isomorphic, single, leaders,
  43. correspond, is_monotonic, maxdists, maxinconsts, maxRstat,
  44. is_valid_linkage, is_valid_im, to_tree, leaves_list, dendrogram,
  45. set_link_color_palette, cut_tree, optimal_leaf_ordering,
  46. _order_cluster_tree, _hierarchy, _LINKAGE_METHODS)
  47. from scipy.spatial.distance import pdist
  48. from scipy.cluster._hierarchy import Heap
  49. from . import hierarchy_test_data
  50. # Matplotlib is not a scipy dependency but is optionally used in dendrogram, so
  51. # check if it's available
  52. try:
  53. import matplotlib
  54. # and set the backend to be Agg (no gui)
  55. matplotlib.use('Agg')
  56. # before importing pyplot
  57. import matplotlib.pyplot as plt
  58. have_matplotlib = True
  59. except Exception:
  60. have_matplotlib = False
  61. class TestLinkage:
  62. def test_linkage_non_finite_elements_in_distance_matrix(self):
  63. # Tests linkage(Y) where Y contains a non-finite element (e.g. NaN or Inf).
  64. # Exception expected.
  65. y = np.zeros((6,))
  66. y[0] = np.nan
  67. assert_raises(ValueError, linkage, y)
  68. def test_linkage_empty_distance_matrix(self):
  69. # Tests linkage(Y) where Y is a 0x4 linkage matrix. Exception expected.
  70. y = np.zeros((0,))
  71. assert_raises(ValueError, linkage, y)
  72. def test_linkage_tdist(self):
  73. for method in ['single', 'complete', 'average', 'weighted']:
  74. self.check_linkage_tdist(method)
  75. def check_linkage_tdist(self, method):
  76. # Tests linkage(Y, method) on the tdist data set.
  77. Z = linkage(hierarchy_test_data.ytdist, method)
  78. expectedZ = getattr(hierarchy_test_data, 'linkage_ytdist_' + method)
  79. assert_allclose(Z, expectedZ, atol=1e-10)
  80. def test_linkage_X(self):
  81. for method in ['centroid', 'median', 'ward']:
  82. self.check_linkage_q(method)
  83. def check_linkage_q(self, method):
  84. # Tests linkage(Y, method) on the Q data set.
  85. Z = linkage(hierarchy_test_data.X, method)
  86. expectedZ = getattr(hierarchy_test_data, 'linkage_X_' + method)
  87. assert_allclose(Z, expectedZ, atol=1e-06)
  88. y = scipy.spatial.distance.pdist(hierarchy_test_data.X,
  89. metric="euclidean")
  90. Z = linkage(y, method)
  91. assert_allclose(Z, expectedZ, atol=1e-06)
  92. def test_compare_with_trivial(self):
  93. rng = np.random.RandomState(0)
  94. n = 20
  95. X = rng.rand(n, 2)
  96. d = pdist(X)
  97. for method, code in _LINKAGE_METHODS.items():
  98. Z_trivial = _hierarchy.linkage(d, n, code)
  99. Z = linkage(d, method)
  100. assert_allclose(Z_trivial, Z, rtol=1e-14, atol=1e-15)
  101. def test_optimal_leaf_ordering(self):
  102. Z = linkage(hierarchy_test_data.ytdist, optimal_ordering=True)
  103. expectedZ = getattr(hierarchy_test_data, 'linkage_ytdist_single_olo')
  104. assert_allclose(Z, expectedZ, atol=1e-10)
  105. class TestLinkageTies:
  106. _expectations = {
  107. 'single': np.array([[0, 1, 1.41421356, 2],
  108. [2, 3, 1.41421356, 3]]),
  109. 'complete': np.array([[0, 1, 1.41421356, 2],
  110. [2, 3, 2.82842712, 3]]),
  111. 'average': np.array([[0, 1, 1.41421356, 2],
  112. [2, 3, 2.12132034, 3]]),
  113. 'weighted': np.array([[0, 1, 1.41421356, 2],
  114. [2, 3, 2.12132034, 3]]),
  115. 'centroid': np.array([[0, 1, 1.41421356, 2],
  116. [2, 3, 2.12132034, 3]]),
  117. 'median': np.array([[0, 1, 1.41421356, 2],
  118. [2, 3, 2.12132034, 3]]),
  119. 'ward': np.array([[0, 1, 1.41421356, 2],
  120. [2, 3, 2.44948974, 3]]),
  121. }
  122. def test_linkage_ties(self):
  123. for method in ['single', 'complete', 'average', 'weighted', 'centroid', 'median', 'ward']:
  124. self.check_linkage_ties(method)
  125. def check_linkage_ties(self, method):
  126. X = np.array([[-1, -1], [0, 0], [1, 1]])
  127. Z = linkage(X, method=method)
  128. expectedZ = self._expectations[method]
  129. assert_allclose(Z, expectedZ, atol=1e-06)
  130. class TestInconsistent:
  131. def test_inconsistent_tdist(self):
  132. for depth in hierarchy_test_data.inconsistent_ytdist:
  133. self.check_inconsistent_tdist(depth)
  134. def check_inconsistent_tdist(self, depth):
  135. Z = hierarchy_test_data.linkage_ytdist_single
  136. assert_allclose(inconsistent(Z, depth),
  137. hierarchy_test_data.inconsistent_ytdist[depth])
  138. class TestCopheneticDistance:
  139. def test_linkage_cophenet_tdist_Z(self):
  140. # Tests cophenet(Z) on tdist data set.
  141. expectedM = np.array([268, 295, 255, 255, 295, 295, 268, 268, 295, 295,
  142. 295, 138, 219, 295, 295])
  143. Z = hierarchy_test_data.linkage_ytdist_single
  144. M = cophenet(Z)
  145. assert_allclose(M, expectedM, atol=1e-10)
  146. def test_linkage_cophenet_tdist_Z_Y(self):
  147. # Tests cophenet(Z, Y) on tdist data set.
  148. Z = hierarchy_test_data.linkage_ytdist_single
  149. (c, M) = cophenet(Z, hierarchy_test_data.ytdist)
  150. expectedM = np.array([268, 295, 255, 255, 295, 295, 268, 268, 295, 295,
  151. 295, 138, 219, 295, 295])
  152. expectedc = 0.639931296433393415057366837573
  153. assert_allclose(c, expectedc, atol=1e-10)
  154. assert_allclose(M, expectedM, atol=1e-10)
  155. class TestMLabLinkageConversion:
  156. def test_mlab_linkage_conversion_empty(self):
  157. # Tests from/to_mlab_linkage on empty linkage array.
  158. X = np.asarray([])
  159. assert_equal(from_mlab_linkage([]), X)
  160. assert_equal(to_mlab_linkage([]), X)
  161. def test_mlab_linkage_conversion_single_row(self):
  162. # Tests from/to_mlab_linkage on linkage array with single row.
  163. Z = np.asarray([[0., 1., 3., 2.]])
  164. Zm = [[1, 2, 3]]
  165. assert_equal(from_mlab_linkage(Zm), Z)
  166. assert_equal(to_mlab_linkage(Z), Zm)
  167. def test_mlab_linkage_conversion_multiple_rows(self):
  168. # Tests from/to_mlab_linkage on linkage array with multiple rows.
  169. Zm = np.asarray([[3, 6, 138], [4, 5, 219],
  170. [1, 8, 255], [2, 9, 268], [7, 10, 295]])
  171. Z = np.array([[2., 5., 138., 2.],
  172. [3., 4., 219., 2.],
  173. [0., 7., 255., 3.],
  174. [1., 8., 268., 4.],
  175. [6., 9., 295., 6.]],
  176. dtype=np.double)
  177. assert_equal(from_mlab_linkage(Zm), Z)
  178. assert_equal(to_mlab_linkage(Z), Zm)
  179. class TestFcluster:
  180. def test_fclusterdata(self):
  181. for t in hierarchy_test_data.fcluster_inconsistent:
  182. self.check_fclusterdata(t, 'inconsistent')
  183. for t in hierarchy_test_data.fcluster_distance:
  184. self.check_fclusterdata(t, 'distance')
  185. for t in hierarchy_test_data.fcluster_maxclust:
  186. self.check_fclusterdata(t, 'maxclust')
  187. def check_fclusterdata(self, t, criterion):
  188. # Tests fclusterdata(X, criterion=criterion, t=t) on a random 3-cluster data set.
  189. expectedT = getattr(hierarchy_test_data, 'fcluster_' + criterion)[t]
  190. X = hierarchy_test_data.Q_X
  191. T = fclusterdata(X, criterion=criterion, t=t)
  192. assert_(is_isomorphic(T, expectedT))
  193. def test_fcluster(self):
  194. for t in hierarchy_test_data.fcluster_inconsistent:
  195. self.check_fcluster(t, 'inconsistent')
  196. for t in hierarchy_test_data.fcluster_distance:
  197. self.check_fcluster(t, 'distance')
  198. for t in hierarchy_test_data.fcluster_maxclust:
  199. self.check_fcluster(t, 'maxclust')
  200. def check_fcluster(self, t, criterion):
  201. # Tests fcluster(Z, criterion=criterion, t=t) on a random 3-cluster data set.
  202. expectedT = getattr(hierarchy_test_data, 'fcluster_' + criterion)[t]
  203. Z = single(hierarchy_test_data.Q_X)
  204. T = fcluster(Z, criterion=criterion, t=t)
  205. assert_(is_isomorphic(T, expectedT))
  206. def test_fcluster_monocrit(self):
  207. for t in hierarchy_test_data.fcluster_distance:
  208. self.check_fcluster_monocrit(t)
  209. for t in hierarchy_test_data.fcluster_maxclust:
  210. self.check_fcluster_maxclust_monocrit(t)
  211. def check_fcluster_monocrit(self, t):
  212. expectedT = hierarchy_test_data.fcluster_distance[t]
  213. Z = single(hierarchy_test_data.Q_X)
  214. T = fcluster(Z, t, criterion='monocrit', monocrit=maxdists(Z))
  215. assert_(is_isomorphic(T, expectedT))
  216. def check_fcluster_maxclust_monocrit(self, t):
  217. expectedT = hierarchy_test_data.fcluster_maxclust[t]
  218. Z = single(hierarchy_test_data.Q_X)
  219. T = fcluster(Z, t, criterion='maxclust_monocrit', monocrit=maxdists(Z))
  220. assert_(is_isomorphic(T, expectedT))
  221. class TestLeaders:
  222. def test_leaders_single(self):
  223. # Tests leaders using a flat clustering generated by single linkage.
  224. X = hierarchy_test_data.Q_X
  225. Y = pdist(X)
  226. Z = linkage(Y)
  227. T = fcluster(Z, criterion='maxclust', t=3)
  228. Lright = (np.array([53, 55, 56]), np.array([2, 3, 1]))
  229. L = leaders(Z, T)
  230. assert_equal(L, Lright)
  231. class TestIsIsomorphic:
  232. def test_is_isomorphic_1(self):
  233. # Tests is_isomorphic on test case #1 (one flat cluster, different labellings)
  234. a = [1, 1, 1]
  235. b = [2, 2, 2]
  236. assert_(is_isomorphic(a, b))
  237. assert_(is_isomorphic(b, a))
  238. def test_is_isomorphic_2(self):
  239. # Tests is_isomorphic on test case #2 (two flat clusters, different labelings)
  240. a = [1, 7, 1]
  241. b = [2, 3, 2]
  242. assert_(is_isomorphic(a, b))
  243. assert_(is_isomorphic(b, a))
  244. def test_is_isomorphic_3(self):
  245. # Tests is_isomorphic on test case #3 (no flat clusters)
  246. a = []
  247. b = []
  248. assert_(is_isomorphic(a, b))
  249. def test_is_isomorphic_4A(self):
  250. # Tests is_isomorphic on test case #4A (3 flat clusters, different labelings, isomorphic)
  251. a = [1, 2, 3]
  252. b = [1, 3, 2]
  253. assert_(is_isomorphic(a, b))
  254. assert_(is_isomorphic(b, a))
  255. def test_is_isomorphic_4B(self):
  256. # Tests is_isomorphic on test case #4B (3 flat clusters, different labelings, nonisomorphic)
  257. a = [1, 2, 3, 3]
  258. b = [1, 3, 2, 3]
  259. assert_(is_isomorphic(a, b) == False)
  260. assert_(is_isomorphic(b, a) == False)
  261. def test_is_isomorphic_4C(self):
  262. # Tests is_isomorphic on test case #4C (3 flat clusters, different labelings, isomorphic)
  263. a = [7, 2, 3]
  264. b = [6, 3, 2]
  265. assert_(is_isomorphic(a, b))
  266. assert_(is_isomorphic(b, a))
  267. def test_is_isomorphic_5(self):
  268. # Tests is_isomorphic on test case #5 (1000 observations, 2/3/5 random
  269. # clusters, random permutation of the labeling).
  270. for nc in [2, 3, 5]:
  271. self.help_is_isomorphic_randperm(1000, nc)
  272. def test_is_isomorphic_6(self):
  273. # Tests is_isomorphic on test case #5A (1000 observations, 2/3/5 random
  274. # clusters, random permutation of the labeling, slightly
  275. # nonisomorphic.)
  276. for nc in [2, 3, 5]:
  277. self.help_is_isomorphic_randperm(1000, nc, True, 5)
  278. def test_is_isomorphic_7(self):
  279. # Regression test for gh-6271
  280. assert_(not is_isomorphic([1, 2, 3], [1, 1, 1]))
  281. def help_is_isomorphic_randperm(self, nobs, nclusters, noniso=False, nerrors=0):
  282. for k in range(3):
  283. a = np.int_(np.random.rand(nobs) * nclusters)
  284. b = np.zeros(a.size, dtype=np.int_)
  285. P = np.random.permutation(nclusters)
  286. for i in range(0, a.shape[0]):
  287. b[i] = P[a[i]]
  288. if noniso:
  289. Q = np.random.permutation(nobs)
  290. b[Q[0:nerrors]] += 1
  291. b[Q[0:nerrors]] %= nclusters
  292. assert_(is_isomorphic(a, b) == (not noniso))
  293. assert_(is_isomorphic(b, a) == (not noniso))
  294. class TestIsValidLinkage:
  295. def test_is_valid_linkage_various_size(self):
  296. for nrow, ncol, valid in [(2, 5, False), (2, 3, False),
  297. (1, 4, True), (2, 4, True)]:
  298. self.check_is_valid_linkage_various_size(nrow, ncol, valid)
  299. def check_is_valid_linkage_various_size(self, nrow, ncol, valid):
  300. # Tests is_valid_linkage(Z) with linkage matrics of various sizes
  301. Z = np.asarray([[0, 1, 3.0, 2, 5],
  302. [3, 2, 4.0, 3, 3]], dtype=np.double)
  303. Z = Z[:nrow, :ncol]
  304. assert_(is_valid_linkage(Z) == valid)
  305. if not valid:
  306. assert_raises(ValueError, is_valid_linkage, Z, throw=True)
  307. def test_is_valid_linkage_int_type(self):
  308. # Tests is_valid_linkage(Z) with integer type.
  309. Z = np.asarray([[0, 1, 3.0, 2],
  310. [3, 2, 4.0, 3]], dtype=int)
  311. assert_(is_valid_linkage(Z) == False)
  312. assert_raises(TypeError, is_valid_linkage, Z, throw=True)
  313. def test_is_valid_linkage_empty(self):
  314. # Tests is_valid_linkage(Z) with empty linkage.
  315. Z = np.zeros((0, 4), dtype=np.double)
  316. assert_(is_valid_linkage(Z) == False)
  317. assert_raises(ValueError, is_valid_linkage, Z, throw=True)
  318. def test_is_valid_linkage_4_and_up(self):
  319. # Tests is_valid_linkage(Z) on linkage on observation sets between
  320. # sizes 4 and 15 (step size 3).
  321. for i in range(4, 15, 3):
  322. y = np.random.rand(i*(i-1)//2)
  323. Z = linkage(y)
  324. assert_(is_valid_linkage(Z) == True)
  325. def test_is_valid_linkage_4_and_up_neg_index_left(self):
  326. # Tests is_valid_linkage(Z) on linkage on observation sets between
  327. # sizes 4 and 15 (step size 3) with negative indices (left).
  328. for i in range(4, 15, 3):
  329. y = np.random.rand(i*(i-1)//2)
  330. Z = linkage(y)
  331. Z[i//2,0] = -2
  332. assert_(is_valid_linkage(Z) == False)
  333. assert_raises(ValueError, is_valid_linkage, Z, throw=True)
  334. def test_is_valid_linkage_4_and_up_neg_index_right(self):
  335. # Tests is_valid_linkage(Z) on linkage on observation sets between
  336. # sizes 4 and 15 (step size 3) with negative indices (right).
  337. for i in range(4, 15, 3):
  338. y = np.random.rand(i*(i-1)//2)
  339. Z = linkage(y)
  340. Z[i//2,1] = -2
  341. assert_(is_valid_linkage(Z) == False)
  342. assert_raises(ValueError, is_valid_linkage, Z, throw=True)
  343. def test_is_valid_linkage_4_and_up_neg_dist(self):
  344. # Tests is_valid_linkage(Z) on linkage on observation sets between
  345. # sizes 4 and 15 (step size 3) with negative distances.
  346. for i in range(4, 15, 3):
  347. y = np.random.rand(i*(i-1)//2)
  348. Z = linkage(y)
  349. Z[i//2,2] = -0.5
  350. assert_(is_valid_linkage(Z) == False)
  351. assert_raises(ValueError, is_valid_linkage, Z, throw=True)
  352. def test_is_valid_linkage_4_and_up_neg_counts(self):
  353. # Tests is_valid_linkage(Z) on linkage on observation sets between
  354. # sizes 4 and 15 (step size 3) with negative counts.
  355. for i in range(4, 15, 3):
  356. y = np.random.rand(i*(i-1)//2)
  357. Z = linkage(y)
  358. Z[i//2,3] = -2
  359. assert_(is_valid_linkage(Z) == False)
  360. assert_raises(ValueError, is_valid_linkage, Z, throw=True)
  361. class TestIsValidInconsistent:
  362. def test_is_valid_im_int_type(self):
  363. # Tests is_valid_im(R) with integer type.
  364. R = np.asarray([[0, 1, 3.0, 2],
  365. [3, 2, 4.0, 3]], dtype=int)
  366. assert_(is_valid_im(R) == False)
  367. assert_raises(TypeError, is_valid_im, R, throw=True)
  368. def test_is_valid_im_various_size(self):
  369. for nrow, ncol, valid in [(2, 5, False), (2, 3, False),
  370. (1, 4, True), (2, 4, True)]:
  371. self.check_is_valid_im_various_size(nrow, ncol, valid)
  372. def check_is_valid_im_various_size(self, nrow, ncol, valid):
  373. # Tests is_valid_im(R) with linkage matrics of various sizes
  374. R = np.asarray([[0, 1, 3.0, 2, 5],
  375. [3, 2, 4.0, 3, 3]], dtype=np.double)
  376. R = R[:nrow, :ncol]
  377. assert_(is_valid_im(R) == valid)
  378. if not valid:
  379. assert_raises(ValueError, is_valid_im, R, throw=True)
  380. def test_is_valid_im_empty(self):
  381. # Tests is_valid_im(R) with empty inconsistency matrix.
  382. R = np.zeros((0, 4), dtype=np.double)
  383. assert_(is_valid_im(R) == False)
  384. assert_raises(ValueError, is_valid_im, R, throw=True)
  385. def test_is_valid_im_4_and_up(self):
  386. # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15
  387. # (step size 3).
  388. for i in range(4, 15, 3):
  389. y = np.random.rand(i*(i-1)//2)
  390. Z = linkage(y)
  391. R = inconsistent(Z)
  392. assert_(is_valid_im(R) == True)
  393. def test_is_valid_im_4_and_up_neg_index_left(self):
  394. # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15
  395. # (step size 3) with negative link height means.
  396. for i in range(4, 15, 3):
  397. y = np.random.rand(i*(i-1)//2)
  398. Z = linkage(y)
  399. R = inconsistent(Z)
  400. R[i//2,0] = -2.0
  401. assert_(is_valid_im(R) == False)
  402. assert_raises(ValueError, is_valid_im, R, throw=True)
  403. def test_is_valid_im_4_and_up_neg_index_right(self):
  404. # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15
  405. # (step size 3) with negative link height standard deviations.
  406. for i in range(4, 15, 3):
  407. y = np.random.rand(i*(i-1)//2)
  408. Z = linkage(y)
  409. R = inconsistent(Z)
  410. R[i//2,1] = -2.0
  411. assert_(is_valid_im(R) == False)
  412. assert_raises(ValueError, is_valid_im, R, throw=True)
  413. def test_is_valid_im_4_and_up_neg_dist(self):
  414. # Tests is_valid_im(R) on im on observation sets between sizes 4 and 15
  415. # (step size 3) with negative link counts.
  416. for i in range(4, 15, 3):
  417. y = np.random.rand(i*(i-1)//2)
  418. Z = linkage(y)
  419. R = inconsistent(Z)
  420. R[i//2,2] = -0.5
  421. assert_(is_valid_im(R) == False)
  422. assert_raises(ValueError, is_valid_im, R, throw=True)
  423. class TestNumObsLinkage:
  424. def test_num_obs_linkage_empty(self):
  425. # Tests num_obs_linkage(Z) with empty linkage.
  426. Z = np.zeros((0, 4), dtype=np.double)
  427. assert_raises(ValueError, num_obs_linkage, Z)
  428. def test_num_obs_linkage_1x4(self):
  429. # Tests num_obs_linkage(Z) on linkage over 2 observations.
  430. Z = np.asarray([[0, 1, 3.0, 2]], dtype=np.double)
  431. assert_equal(num_obs_linkage(Z), 2)
  432. def test_num_obs_linkage_2x4(self):
  433. # Tests num_obs_linkage(Z) on linkage over 3 observations.
  434. Z = np.asarray([[0, 1, 3.0, 2],
  435. [3, 2, 4.0, 3]], dtype=np.double)
  436. assert_equal(num_obs_linkage(Z), 3)
  437. def test_num_obs_linkage_4_and_up(self):
  438. # Tests num_obs_linkage(Z) on linkage on observation sets between sizes
  439. # 4 and 15 (step size 3).
  440. for i in range(4, 15, 3):
  441. y = np.random.rand(i*(i-1)//2)
  442. Z = linkage(y)
  443. assert_equal(num_obs_linkage(Z), i)
  444. class TestLeavesList:
  445. def test_leaves_list_1x4(self):
  446. # Tests leaves_list(Z) on a 1x4 linkage.
  447. Z = np.asarray([[0, 1, 3.0, 2]], dtype=np.double)
  448. to_tree(Z)
  449. assert_equal(leaves_list(Z), [0, 1])
  450. def test_leaves_list_2x4(self):
  451. # Tests leaves_list(Z) on a 2x4 linkage.
  452. Z = np.asarray([[0, 1, 3.0, 2],
  453. [3, 2, 4.0, 3]], dtype=np.double)
  454. to_tree(Z)
  455. assert_equal(leaves_list(Z), [0, 1, 2])
  456. def test_leaves_list_Q(self):
  457. for method in ['single', 'complete', 'average', 'weighted', 'centroid',
  458. 'median', 'ward']:
  459. self.check_leaves_list_Q(method)
  460. def check_leaves_list_Q(self, method):
  461. # Tests leaves_list(Z) on the Q data set
  462. X = hierarchy_test_data.Q_X
  463. Z = linkage(X, method)
  464. node = to_tree(Z)
  465. assert_equal(node.pre_order(), leaves_list(Z))
  466. def test_Q_subtree_pre_order(self):
  467. # Tests that pre_order() works when called on sub-trees.
  468. X = hierarchy_test_data.Q_X
  469. Z = linkage(X, 'single')
  470. node = to_tree(Z)
  471. assert_equal(node.pre_order(), (node.get_left().pre_order()
  472. + node.get_right().pre_order()))
  473. class TestCorrespond:
  474. def test_correspond_empty(self):
  475. # Tests correspond(Z, y) with empty linkage and condensed distance matrix.
  476. y = np.zeros((0,))
  477. Z = np.zeros((0,4))
  478. assert_raises(ValueError, correspond, Z, y)
  479. def test_correspond_2_and_up(self):
  480. # Tests correspond(Z, y) on linkage and CDMs over observation sets of
  481. # different sizes.
  482. for i in range(2, 4):
  483. y = np.random.rand(i*(i-1)//2)
  484. Z = linkage(y)
  485. assert_(correspond(Z, y))
  486. for i in range(4, 15, 3):
  487. y = np.random.rand(i*(i-1)//2)
  488. Z = linkage(y)
  489. assert_(correspond(Z, y))
  490. def test_correspond_4_and_up(self):
  491. # Tests correspond(Z, y) on linkage and CDMs over observation sets of
  492. # different sizes. Correspondence should be false.
  493. for (i, j) in (list(zip(list(range(2, 4)), list(range(3, 5)))) +
  494. list(zip(list(range(3, 5)), list(range(2, 4))))):
  495. y = np.random.rand(i*(i-1)//2)
  496. y2 = np.random.rand(j*(j-1)//2)
  497. Z = linkage(y)
  498. Z2 = linkage(y2)
  499. assert_equal(correspond(Z, y2), False)
  500. assert_equal(correspond(Z2, y), False)
  501. def test_correspond_4_and_up_2(self):
  502. # Tests correspond(Z, y) on linkage and CDMs over observation sets of
  503. # different sizes. Correspondence should be false.
  504. for (i, j) in (list(zip(list(range(2, 7)), list(range(16, 21)))) +
  505. list(zip(list(range(2, 7)), list(range(16, 21))))):
  506. y = np.random.rand(i*(i-1)//2)
  507. y2 = np.random.rand(j*(j-1)//2)
  508. Z = linkage(y)
  509. Z2 = linkage(y2)
  510. assert_equal(correspond(Z, y2), False)
  511. assert_equal(correspond(Z2, y), False)
  512. def test_num_obs_linkage_multi_matrix(self):
  513. # Tests num_obs_linkage with observation matrices of multiple sizes.
  514. for n in range(2, 10):
  515. X = np.random.rand(n, 4)
  516. Y = pdist(X)
  517. Z = linkage(Y)
  518. assert_equal(num_obs_linkage(Z), n)
  519. class TestIsMonotonic:
  520. def test_is_monotonic_empty(self):
  521. # Tests is_monotonic(Z) on an empty linkage.
  522. Z = np.zeros((0, 4))
  523. assert_raises(ValueError, is_monotonic, Z)
  524. def test_is_monotonic_1x4(self):
  525. # Tests is_monotonic(Z) on 1x4 linkage. Expecting True.
  526. Z = np.asarray([[0, 1, 0.3, 2]], dtype=np.double)
  527. assert_equal(is_monotonic(Z), True)
  528. def test_is_monotonic_2x4_T(self):
  529. # Tests is_monotonic(Z) on 2x4 linkage. Expecting True.
  530. Z = np.asarray([[0, 1, 0.3, 2],
  531. [2, 3, 0.4, 3]], dtype=np.double)
  532. assert_equal(is_monotonic(Z), True)
  533. def test_is_monotonic_2x4_F(self):
  534. # Tests is_monotonic(Z) on 2x4 linkage. Expecting False.
  535. Z = np.asarray([[0, 1, 0.4, 2],
  536. [2, 3, 0.3, 3]], dtype=np.double)
  537. assert_equal(is_monotonic(Z), False)
  538. def test_is_monotonic_3x4_T(self):
  539. # Tests is_monotonic(Z) on 3x4 linkage. Expecting True.
  540. Z = np.asarray([[0, 1, 0.3, 2],
  541. [2, 3, 0.4, 2],
  542. [4, 5, 0.6, 4]], dtype=np.double)
  543. assert_equal(is_monotonic(Z), True)
  544. def test_is_monotonic_3x4_F1(self):
  545. # Tests is_monotonic(Z) on 3x4 linkage (case 1). Expecting False.
  546. Z = np.asarray([[0, 1, 0.3, 2],
  547. [2, 3, 0.2, 2],
  548. [4, 5, 0.6, 4]], dtype=np.double)
  549. assert_equal(is_monotonic(Z), False)
  550. def test_is_monotonic_3x4_F2(self):
  551. # Tests is_monotonic(Z) on 3x4 linkage (case 2). Expecting False.
  552. Z = np.asarray([[0, 1, 0.8, 2],
  553. [2, 3, 0.4, 2],
  554. [4, 5, 0.6, 4]], dtype=np.double)
  555. assert_equal(is_monotonic(Z), False)
  556. def test_is_monotonic_3x4_F3(self):
  557. # Tests is_monotonic(Z) on 3x4 linkage (case 3). Expecting False
  558. Z = np.asarray([[0, 1, 0.3, 2],
  559. [2, 3, 0.4, 2],
  560. [4, 5, 0.2, 4]], dtype=np.double)
  561. assert_equal(is_monotonic(Z), False)
  562. def test_is_monotonic_tdist_linkage1(self):
  563. # Tests is_monotonic(Z) on clustering generated by single linkage on
  564. # tdist data set. Expecting True.
  565. Z = linkage(hierarchy_test_data.ytdist, 'single')
  566. assert_equal(is_monotonic(Z), True)
  567. def test_is_monotonic_tdist_linkage2(self):
  568. # Tests is_monotonic(Z) on clustering generated by single linkage on
  569. # tdist data set. Perturbing. Expecting False.
  570. Z = linkage(hierarchy_test_data.ytdist, 'single')
  571. Z[2,2] = 0.0
  572. assert_equal(is_monotonic(Z), False)
  573. def test_is_monotonic_Q_linkage(self):
  574. # Tests is_monotonic(Z) on clustering generated by single linkage on
  575. # Q data set. Expecting True.
  576. X = hierarchy_test_data.Q_X
  577. Z = linkage(X, 'single')
  578. assert_equal(is_monotonic(Z), True)
  579. class TestMaxDists:
  580. def test_maxdists_empty_linkage(self):
  581. # Tests maxdists(Z) on empty linkage. Expecting exception.
  582. Z = np.zeros((0, 4), dtype=np.double)
  583. assert_raises(ValueError, maxdists, Z)
  584. def test_maxdists_one_cluster_linkage(self):
  585. # Tests maxdists(Z) on linkage with one cluster.
  586. Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
  587. MD = maxdists(Z)
  588. expectedMD = calculate_maximum_distances(Z)
  589. assert_allclose(MD, expectedMD, atol=1e-15)
  590. def test_maxdists_Q_linkage(self):
  591. for method in ['single', 'complete', 'ward', 'centroid', 'median']:
  592. self.check_maxdists_Q_linkage(method)
  593. def check_maxdists_Q_linkage(self, method):
  594. # Tests maxdists(Z) on the Q data set
  595. X = hierarchy_test_data.Q_X
  596. Z = linkage(X, method)
  597. MD = maxdists(Z)
  598. expectedMD = calculate_maximum_distances(Z)
  599. assert_allclose(MD, expectedMD, atol=1e-15)
  600. class TestMaxInconsts:
  601. def test_maxinconsts_empty_linkage(self):
  602. # Tests maxinconsts(Z, R) on empty linkage. Expecting exception.
  603. Z = np.zeros((0, 4), dtype=np.double)
  604. R = np.zeros((0, 4), dtype=np.double)
  605. assert_raises(ValueError, maxinconsts, Z, R)
  606. def test_maxinconsts_difrow_linkage(self):
  607. # Tests maxinconsts(Z, R) on linkage and inconsistency matrices with
  608. # different numbers of clusters. Expecting exception.
  609. Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
  610. R = np.random.rand(2, 4)
  611. assert_raises(ValueError, maxinconsts, Z, R)
  612. def test_maxinconsts_one_cluster_linkage(self):
  613. # Tests maxinconsts(Z, R) on linkage with one cluster.
  614. Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
  615. R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double)
  616. MD = maxinconsts(Z, R)
  617. expectedMD = calculate_maximum_inconsistencies(Z, R)
  618. assert_allclose(MD, expectedMD, atol=1e-15)
  619. def test_maxinconsts_Q_linkage(self):
  620. for method in ['single', 'complete', 'ward', 'centroid', 'median']:
  621. self.check_maxinconsts_Q_linkage(method)
  622. def check_maxinconsts_Q_linkage(self, method):
  623. # Tests maxinconsts(Z, R) on the Q data set
  624. X = hierarchy_test_data.Q_X
  625. Z = linkage(X, method)
  626. R = inconsistent(Z)
  627. MD = maxinconsts(Z, R)
  628. expectedMD = calculate_maximum_inconsistencies(Z, R)
  629. assert_allclose(MD, expectedMD, atol=1e-15)
  630. class TestMaxRStat:
  631. def test_maxRstat_invalid_index(self):
  632. for i in [3.3, -1, 4]:
  633. self.check_maxRstat_invalid_index(i)
  634. def check_maxRstat_invalid_index(self, i):
  635. # Tests maxRstat(Z, R, i). Expecting exception.
  636. Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
  637. R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double)
  638. if isinstance(i, int):
  639. assert_raises(ValueError, maxRstat, Z, R, i)
  640. else:
  641. assert_raises(TypeError, maxRstat, Z, R, i)
  642. def test_maxRstat_empty_linkage(self):
  643. for i in range(4):
  644. self.check_maxRstat_empty_linkage(i)
  645. def check_maxRstat_empty_linkage(self, i):
  646. # Tests maxRstat(Z, R, i) on empty linkage. Expecting exception.
  647. Z = np.zeros((0, 4), dtype=np.double)
  648. R = np.zeros((0, 4), dtype=np.double)
  649. assert_raises(ValueError, maxRstat, Z, R, i)
  650. def test_maxRstat_difrow_linkage(self):
  651. for i in range(4):
  652. self.check_maxRstat_difrow_linkage(i)
  653. def check_maxRstat_difrow_linkage(self, i):
  654. # Tests maxRstat(Z, R, i) on linkage and inconsistency matrices with
  655. # different numbers of clusters. Expecting exception.
  656. Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
  657. R = np.random.rand(2, 4)
  658. assert_raises(ValueError, maxRstat, Z, R, i)
  659. def test_maxRstat_one_cluster_linkage(self):
  660. for i in range(4):
  661. self.check_maxRstat_one_cluster_linkage(i)
  662. def check_maxRstat_one_cluster_linkage(self, i):
  663. # Tests maxRstat(Z, R, i) on linkage with one cluster.
  664. Z = np.asarray([[0, 1, 0.3, 4]], dtype=np.double)
  665. R = np.asarray([[0, 0, 0, 0.3]], dtype=np.double)
  666. MD = maxRstat(Z, R, 1)
  667. expectedMD = calculate_maximum_inconsistencies(Z, R, 1)
  668. assert_allclose(MD, expectedMD, atol=1e-15)
  669. def test_maxRstat_Q_linkage(self):
  670. for method in ['single', 'complete', 'ward', 'centroid', 'median']:
  671. for i in range(4):
  672. self.check_maxRstat_Q_linkage(method, i)
  673. def check_maxRstat_Q_linkage(self, method, i):
  674. # Tests maxRstat(Z, R, i) on the Q data set
  675. X = hierarchy_test_data.Q_X
  676. Z = linkage(X, method)
  677. R = inconsistent(Z)
  678. MD = maxRstat(Z, R, 1)
  679. expectedMD = calculate_maximum_inconsistencies(Z, R, 1)
  680. assert_allclose(MD, expectedMD, atol=1e-15)
  681. class TestDendrogram:
  682. def test_dendrogram_single_linkage_tdist(self):
  683. # Tests dendrogram calculation on single linkage of the tdist data set.
  684. Z = linkage(hierarchy_test_data.ytdist, 'single')
  685. R = dendrogram(Z, no_plot=True)
  686. leaves = R["leaves"]
  687. assert_equal(leaves, [2, 5, 1, 0, 3, 4])
  688. def test_valid_orientation(self):
  689. Z = linkage(hierarchy_test_data.ytdist, 'single')
  690. assert_raises(ValueError, dendrogram, Z, orientation="foo")
  691. def test_labels_as_array_or_list(self):
  692. # test for gh-12418
  693. Z = linkage(hierarchy_test_data.ytdist, 'single')
  694. labels = np.array([1, 3, 2, 6, 4, 5])
  695. result1 = dendrogram(Z, labels=labels, no_plot=True)
  696. result2 = dendrogram(Z, labels=labels.tolist(), no_plot=True)
  697. assert result1 == result2
  698. @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib")
  699. def test_valid_label_size(self):
  700. link = np.array([
  701. [0, 1, 1.0, 4],
  702. [2, 3, 1.0, 5],
  703. [4, 5, 2.0, 6],
  704. ])
  705. plt.figure()
  706. with pytest.raises(ValueError) as exc_info:
  707. dendrogram(link, labels=list(range(100)))
  708. assert "Dimensions of Z and labels must be consistent."\
  709. in str(exc_info.value)
  710. with pytest.raises(
  711. ValueError,
  712. match="Dimensions of Z and labels must be consistent."):
  713. dendrogram(link, labels=[])
  714. plt.close()
  715. @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib")
  716. def test_dendrogram_plot(self):
  717. for orientation in ['top', 'bottom', 'left', 'right']:
  718. self.check_dendrogram_plot(orientation)
  719. def check_dendrogram_plot(self, orientation):
  720. # Tests dendrogram plotting.
  721. Z = linkage(hierarchy_test_data.ytdist, 'single')
  722. expected = {'color_list': ['C1', 'C0', 'C0', 'C0', 'C0'],
  723. 'dcoord': [[0.0, 138.0, 138.0, 0.0],
  724. [0.0, 219.0, 219.0, 0.0],
  725. [0.0, 255.0, 255.0, 219.0],
  726. [0.0, 268.0, 268.0, 255.0],
  727. [138.0, 295.0, 295.0, 268.0]],
  728. 'icoord': [[5.0, 5.0, 15.0, 15.0],
  729. [45.0, 45.0, 55.0, 55.0],
  730. [35.0, 35.0, 50.0, 50.0],
  731. [25.0, 25.0, 42.5, 42.5],
  732. [10.0, 10.0, 33.75, 33.75]],
  733. 'ivl': ['2', '5', '1', '0', '3', '4'],
  734. 'leaves': [2, 5, 1, 0, 3, 4],
  735. 'leaves_color_list': ['C1', 'C1', 'C0', 'C0', 'C0', 'C0'],
  736. }
  737. fig = plt.figure()
  738. ax = fig.add_subplot(221)
  739. # test that dendrogram accepts ax keyword
  740. R1 = dendrogram(Z, ax=ax, orientation=orientation)
  741. assert_equal(R1, expected)
  742. # test that dendrogram accepts and handle the leaf_font_size and
  743. # leaf_rotation keywords
  744. dendrogram(Z, ax=ax, orientation=orientation,
  745. leaf_font_size=20, leaf_rotation=90)
  746. testlabel = (
  747. ax.get_xticklabels()[0]
  748. if orientation in ['top', 'bottom']
  749. else ax.get_yticklabels()[0]
  750. )
  751. assert_equal(testlabel.get_rotation(), 90)
  752. assert_equal(testlabel.get_size(), 20)
  753. dendrogram(Z, ax=ax, orientation=orientation,
  754. leaf_rotation=90)
  755. testlabel = (
  756. ax.get_xticklabels()[0]
  757. if orientation in ['top', 'bottom']
  758. else ax.get_yticklabels()[0]
  759. )
  760. assert_equal(testlabel.get_rotation(), 90)
  761. dendrogram(Z, ax=ax, orientation=orientation,
  762. leaf_font_size=20)
  763. testlabel = (
  764. ax.get_xticklabels()[0]
  765. if orientation in ['top', 'bottom']
  766. else ax.get_yticklabels()[0]
  767. )
  768. assert_equal(testlabel.get_size(), 20)
  769. plt.close()
  770. # test plotting to gca (will import pylab)
  771. R2 = dendrogram(Z, orientation=orientation)
  772. plt.close()
  773. assert_equal(R2, expected)
  774. @pytest.mark.skipif(not have_matplotlib, reason="no matplotlib")
  775. def test_dendrogram_truncate_mode(self):
  776. Z = linkage(hierarchy_test_data.ytdist, 'single')
  777. R = dendrogram(Z, 2, 'lastp', show_contracted=True)
  778. plt.close()
  779. assert_equal(R, {'color_list': ['C0'],
  780. 'dcoord': [[0.0, 295.0, 295.0, 0.0]],
  781. 'icoord': [[5.0, 5.0, 15.0, 15.0]],
  782. 'ivl': ['(2)', '(4)'],
  783. 'leaves': [6, 9],
  784. 'leaves_color_list': ['C0', 'C0'],
  785. })
  786. R = dendrogram(Z, 2, 'mtica', show_contracted=True)
  787. plt.close()
  788. assert_equal(R, {'color_list': ['C1', 'C0', 'C0', 'C0'],
  789. 'dcoord': [[0.0, 138.0, 138.0, 0.0],
  790. [0.0, 255.0, 255.0, 0.0],
  791. [0.0, 268.0, 268.0, 255.0],
  792. [138.0, 295.0, 295.0, 268.0]],
  793. 'icoord': [[5.0, 5.0, 15.0, 15.0],
  794. [35.0, 35.0, 45.0, 45.0],
  795. [25.0, 25.0, 40.0, 40.0],
  796. [10.0, 10.0, 32.5, 32.5]],
  797. 'ivl': ['2', '5', '1', '0', '(2)'],
  798. 'leaves': [2, 5, 1, 0, 7],
  799. 'leaves_color_list': ['C1', 'C1', 'C0', 'C0', 'C0'],
  800. })
  801. def test_dendrogram_colors(self):
  802. # Tests dendrogram plots with alternate colors
  803. Z = linkage(hierarchy_test_data.ytdist, 'single')
  804. set_link_color_palette(['c', 'm', 'y', 'k'])
  805. R = dendrogram(Z, no_plot=True,
  806. above_threshold_color='g', color_threshold=250)
  807. set_link_color_palette(['g', 'r', 'c', 'm', 'y', 'k'])
  808. color_list = R['color_list']
  809. assert_equal(color_list, ['c', 'm', 'g', 'g', 'g'])
  810. # reset color palette (global list)
  811. set_link_color_palette(None)
  812. def test_dendrogram_leaf_colors_zero_dist(self):
  813. # tests that the colors of leafs are correct for tree
  814. # with two identical points
  815. x = np.array([[1, 0, 0],
  816. [0, 0, 1],
  817. [0, 2, 0],
  818. [0, 0, 1],
  819. [0, 1, 0],
  820. [0, 1, 0]])
  821. z = linkage(x, "single")
  822. d = dendrogram(z, no_plot=True)
  823. exp_colors = ['C0', 'C1', 'C1', 'C0', 'C2', 'C2']
  824. colors = d["leaves_color_list"]
  825. assert_equal(colors, exp_colors)
  826. def test_dendrogram_leaf_colors(self):
  827. # tests that the colors are correct for a tree
  828. # with two near points ((0, 0, 1.1) and (0, 0, 1))
  829. x = np.array([[1, 0, 0],
  830. [0, 0, 1.1],
  831. [0, 2, 0],
  832. [0, 0, 1],
  833. [0, 1, 0],
  834. [0, 1, 0]])
  835. z = linkage(x, "single")
  836. d = dendrogram(z, no_plot=True)
  837. exp_colors = ['C0', 'C1', 'C1', 'C0', 'C2', 'C2']
  838. colors = d["leaves_color_list"]
  839. assert_equal(colors, exp_colors)
  840. def calculate_maximum_distances(Z):
  841. # Used for testing correctness of maxdists.
  842. n = Z.shape[0] + 1
  843. B = np.zeros((n-1,))
  844. q = np.zeros((3,))
  845. for i in range(0, n - 1):
  846. q[:] = 0.0
  847. left = Z[i, 0]
  848. right = Z[i, 1]
  849. if left >= n:
  850. q[0] = B[int(left) - n]
  851. if right >= n:
  852. q[1] = B[int(right) - n]
  853. q[2] = Z[i, 2]
  854. B[i] = q.max()
  855. return B
  856. def calculate_maximum_inconsistencies(Z, R, k=3):
  857. # Used for testing correctness of maxinconsts.
  858. n = Z.shape[0] + 1
  859. B = np.zeros((n-1,))
  860. q = np.zeros((3,))
  861. for i in range(0, n - 1):
  862. q[:] = 0.0
  863. left = Z[i, 0]
  864. right = Z[i, 1]
  865. if left >= n:
  866. q[0] = B[int(left) - n]
  867. if right >= n:
  868. q[1] = B[int(right) - n]
  869. q[2] = R[i, k]
  870. B[i] = q.max()
  871. return B
  872. def within_tol(a, b, tol):
  873. return np.abs(a - b).max() < tol
  874. def test_unsupported_uncondensed_distance_matrix_linkage_warning():
  875. assert_warns(ClusterWarning, linkage, [[0, 1], [1, 0]])
  876. def test_euclidean_linkage_value_error():
  877. for method in scipy.cluster.hierarchy._EUCLIDEAN_METHODS:
  878. assert_raises(ValueError, linkage, [[1, 1], [1, 1]],
  879. method=method, metric='cityblock')
  880. def test_2x2_linkage():
  881. Z1 = linkage([1], method='single', metric='euclidean')
  882. Z2 = linkage([[0, 1], [0, 0]], method='single', metric='euclidean')
  883. assert_allclose(Z1, Z2)
  884. def test_node_compare():
  885. np.random.seed(23)
  886. nobs = 50
  887. X = np.random.randn(nobs, 4)
  888. Z = scipy.cluster.hierarchy.ward(X)
  889. tree = to_tree(Z)
  890. assert_(tree > tree.get_left())
  891. assert_(tree.get_right() > tree.get_left())
  892. assert_(tree.get_right() == tree.get_right())
  893. assert_(tree.get_right() != tree.get_left())
  894. def test_cut_tree():
  895. np.random.seed(23)
  896. nobs = 50
  897. X = np.random.randn(nobs, 4)
  898. Z = scipy.cluster.hierarchy.ward(X)
  899. cutree = cut_tree(Z)
  900. assert_equal(cutree[:, 0], np.arange(nobs))
  901. assert_equal(cutree[:, -1], np.zeros(nobs))
  902. assert_equal(cutree.max(0), np.arange(nobs - 1, -1, -1))
  903. assert_equal(cutree[:, [-5]], cut_tree(Z, n_clusters=5))
  904. assert_equal(cutree[:, [-5, -10]], cut_tree(Z, n_clusters=[5, 10]))
  905. assert_equal(cutree[:, [-10, -5]], cut_tree(Z, n_clusters=[10, 5]))
  906. nodes = _order_cluster_tree(Z)
  907. heights = np.array([node.dist for node in nodes])
  908. assert_equal(cutree[:, np.searchsorted(heights, [5])],
  909. cut_tree(Z, height=5))
  910. assert_equal(cutree[:, np.searchsorted(heights, [5, 10])],
  911. cut_tree(Z, height=[5, 10]))
  912. assert_equal(cutree[:, np.searchsorted(heights, [10, 5])],
  913. cut_tree(Z, height=[10, 5]))
  914. def test_optimal_leaf_ordering():
  915. # test with the distance vector y
  916. Z = optimal_leaf_ordering(linkage(hierarchy_test_data.ytdist),
  917. hierarchy_test_data.ytdist)
  918. expectedZ = hierarchy_test_data.linkage_ytdist_single_olo
  919. assert_allclose(Z, expectedZ, atol=1e-10)
  920. # test with the observation matrix X
  921. Z = optimal_leaf_ordering(linkage(hierarchy_test_data.X, 'ward'),
  922. hierarchy_test_data.X)
  923. expectedZ = hierarchy_test_data.linkage_X_ward_olo
  924. assert_allclose(Z, expectedZ, atol=1e-06)
  925. def test_Heap():
  926. values = np.array([2, -1, 0, -1.5, 3])
  927. heap = Heap(values)
  928. pair = heap.get_min()
  929. assert_equal(pair['key'], 3)
  930. assert_equal(pair['value'], -1.5)
  931. heap.remove_min()
  932. pair = heap.get_min()
  933. assert_equal(pair['key'], 1)
  934. assert_equal(pair['value'], -1)
  935. heap.change_value(1, 2.5)
  936. pair = heap.get_min()
  937. assert_equal(pair['key'], 2)
  938. assert_equal(pair['value'], 0)
  939. heap.remove_min()
  940. heap.remove_min()
  941. heap.change_value(1, 10)
  942. pair = heap.get_min()
  943. assert_equal(pair['key'], 4)
  944. assert_equal(pair['value'], 3)
  945. heap.remove_min()
  946. pair = heap.get_min()
  947. assert_equal(pair['key'], 1)
  948. assert_equal(pair['value'], 10)