test_distance.py 82 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186
  1. #
  2. # Author: Damian Eads
  3. # Date: April 17, 2008
  4. #
  5. # Copyright (C) 2008 Damian Eads
  6. #
  7. # Redistribution and use in source and binary forms, with or without
  8. # modification, are permitted provided that the following conditions
  9. # are met:
  10. #
  11. # 1. Redistributions of source code must retain the above copyright
  12. # notice, this list of conditions and the following disclaimer.
  13. #
  14. # 2. Redistributions in binary form must reproduce the above
  15. # copyright notice, this list of conditions and the following
  16. # disclaimer in the documentation and/or other materials provided
  17. # with the distribution.
  18. #
  19. # 3. The name of the author may not be used to endorse or promote
  20. # products derived from this software without specific prior
  21. # written permission.
  22. #
  23. # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
  24. # OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  25. # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  26. # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
  27. # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  28. # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
  29. # GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  30. # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  31. # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  32. # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  33. # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  34. import sys
  35. import os.path
  36. from functools import wraps, partial
  37. import weakref
  38. import numpy as np
  39. import warnings
  40. from numpy.linalg import norm
  41. from numpy.testing import (verbose, assert_,
  42. assert_array_equal, assert_equal,
  43. assert_almost_equal, assert_allclose,
  44. break_cycles, IS_PYPY)
  45. import pytest
  46. from pytest import raises as assert_raises
  47. from scipy.spatial.distance import (
  48. squareform, pdist, cdist, num_obs_y, num_obs_dm, is_valid_dm, is_valid_y,
  49. _validate_vector, _METRICS_NAMES)
  50. # these were missing: chebyshev cityblock kulsinski
  51. # jensenshannon and seuclidean are referenced by string name.
  52. from scipy.spatial.distance import (braycurtis, canberra, chebyshev, cityblock,
  53. correlation, cosine, dice, euclidean,
  54. hamming, jaccard, jensenshannon,
  55. kulsinski, kulczynski1, mahalanobis,
  56. minkowski, rogerstanimoto,
  57. russellrao, seuclidean, sokalmichener,
  58. sokalsneath, sqeuclidean, yule)
  59. _filenames = [
  60. "cdist-X1.txt",
  61. "cdist-X2.txt",
  62. "iris.txt",
  63. "pdist-boolean-inp.txt",
  64. "pdist-chebyshev-ml-iris.txt",
  65. "pdist-chebyshev-ml.txt",
  66. "pdist-cityblock-ml-iris.txt",
  67. "pdist-cityblock-ml.txt",
  68. "pdist-correlation-ml-iris.txt",
  69. "pdist-correlation-ml.txt",
  70. "pdist-cosine-ml-iris.txt",
  71. "pdist-cosine-ml.txt",
  72. "pdist-double-inp.txt",
  73. "pdist-euclidean-ml-iris.txt",
  74. "pdist-euclidean-ml.txt",
  75. "pdist-hamming-ml.txt",
  76. "pdist-jaccard-ml.txt",
  77. "pdist-jensenshannon-ml-iris.txt",
  78. "pdist-jensenshannon-ml.txt",
  79. "pdist-minkowski-3.2-ml-iris.txt",
  80. "pdist-minkowski-3.2-ml.txt",
  81. "pdist-minkowski-5.8-ml-iris.txt",
  82. "pdist-seuclidean-ml-iris.txt",
  83. "pdist-seuclidean-ml.txt",
  84. "pdist-spearman-ml.txt",
  85. "random-bool-data.txt",
  86. "random-double-data.txt",
  87. "random-int-data.txt",
  88. "random-uint-data.txt",
  89. ]
  90. _tdist = np.array([[0, 662, 877, 255, 412, 996],
  91. [662, 0, 295, 468, 268, 400],
  92. [877, 295, 0, 754, 564, 138],
  93. [255, 468, 754, 0, 219, 869],
  94. [412, 268, 564, 219, 0, 669],
  95. [996, 400, 138, 869, 669, 0]], dtype='double')
  96. _ytdist = squareform(_tdist)
  97. # A hashmap of expected output arrays for the tests. These arrays
  98. # come from a list of text files, which are read prior to testing.
  99. # Each test loads inputs and outputs from this dictionary.
  100. eo = {}
  101. def load_testing_files():
  102. for fn in _filenames:
  103. name = fn.replace(".txt", "").replace("-ml", "")
  104. fqfn = os.path.join(os.path.dirname(__file__), 'data', fn)
  105. fp = open(fqfn)
  106. eo[name] = np.loadtxt(fp)
  107. fp.close()
  108. eo['pdist-boolean-inp'] = np.bool_(eo['pdist-boolean-inp'])
  109. eo['random-bool-data'] = np.bool_(eo['random-bool-data'])
  110. eo['random-float32-data'] = np.float32(eo['random-double-data'])
  111. eo['random-int-data'] = np.int_(eo['random-int-data'])
  112. eo['random-uint-data'] = np.uint(eo['random-uint-data'])
  113. load_testing_files()
  114. def _is_32bit():
  115. return np.intp(0).itemsize < 8
  116. def _chk_asarrays(arrays, axis=None):
  117. arrays = [np.asanyarray(a) for a in arrays]
  118. if axis is None:
  119. # np < 1.10 ravel removes subclass from arrays
  120. arrays = [np.ravel(a) if a.ndim != 1 else a
  121. for a in arrays]
  122. axis = 0
  123. arrays = tuple(np.atleast_1d(a) for a in arrays)
  124. if axis < 0:
  125. if not all(a.ndim == arrays[0].ndim for a in arrays):
  126. raise ValueError("array ndim must be the same for neg axis")
  127. axis = range(arrays[0].ndim)[axis]
  128. return arrays + (axis,)
  129. def _chk_weights(arrays, weights=None, axis=None,
  130. force_weights=False, simplify_weights=True,
  131. pos_only=False, neg_check=False,
  132. nan_screen=False, mask_screen=False,
  133. ddof=None):
  134. chked = _chk_asarrays(arrays, axis=axis)
  135. arrays, axis = chked[:-1], chked[-1]
  136. simplify_weights = simplify_weights and not force_weights
  137. if not force_weights and mask_screen:
  138. force_weights = any(np.ma.getmask(a) is not np.ma.nomask for a in arrays)
  139. if nan_screen:
  140. has_nans = [np.isnan(np.sum(a)) for a in arrays]
  141. if any(has_nans):
  142. mask_screen = True
  143. force_weights = True
  144. arrays = tuple(np.ma.masked_invalid(a) if has_nan else a
  145. for a, has_nan in zip(arrays, has_nans))
  146. if weights is not None:
  147. weights = np.asanyarray(weights)
  148. elif force_weights:
  149. weights = np.ones(arrays[0].shape[axis])
  150. else:
  151. return arrays + (weights, axis)
  152. if ddof:
  153. weights = _freq_weights(weights)
  154. if mask_screen:
  155. weights = _weight_masked(arrays, weights, axis)
  156. if not all(weights.shape == (a.shape[axis],) for a in arrays):
  157. raise ValueError("weights shape must match arrays along axis")
  158. if neg_check and (weights < 0).any():
  159. raise ValueError("weights cannot be negative")
  160. if pos_only:
  161. pos_weights = np.nonzero(weights > 0)[0]
  162. if pos_weights.size < weights.size:
  163. arrays = tuple(np.take(a, pos_weights, axis=axis) for a in arrays)
  164. weights = weights[pos_weights]
  165. if simplify_weights and (weights == 1).all():
  166. weights = None
  167. return arrays + (weights, axis)
  168. def _freq_weights(weights):
  169. if weights is None:
  170. return weights
  171. int_weights = weights.astype(int)
  172. if (weights != int_weights).any():
  173. raise ValueError("frequency (integer count-type) weights required %s" % weights)
  174. return int_weights
  175. def _weight_masked(arrays, weights, axis):
  176. if axis is None:
  177. axis = 0
  178. weights = np.asanyarray(weights)
  179. for a in arrays:
  180. axis_mask = np.ma.getmask(a)
  181. if axis_mask is np.ma.nomask:
  182. continue
  183. if a.ndim > 1:
  184. not_axes = tuple(i for i in range(a.ndim) if i != axis)
  185. axis_mask = axis_mask.any(axis=not_axes)
  186. weights *= 1 - axis_mask.astype(int)
  187. return weights
  188. def _rand_split(arrays, weights, axis, split_per, seed=None):
  189. # Coerce `arrays` to float64 if integer, to avoid nan-to-integer issues
  190. arrays = [arr.astype(np.float64) if np.issubdtype(arr.dtype, np.integer)
  191. else arr for arr in arrays]
  192. # inverse operation for stats.collapse_weights
  193. weights = np.array(weights, dtype=np.float64) # modified inplace; need a copy
  194. seeded_rand = np.random.RandomState(seed)
  195. def mytake(a, ix, axis):
  196. record = np.asanyarray(np.take(a, ix, axis=axis))
  197. return record.reshape([a.shape[i] if i != axis else 1
  198. for i in range(a.ndim)])
  199. n_obs = arrays[0].shape[axis]
  200. assert all(a.shape[axis] == n_obs for a in arrays), "data must be aligned on sample axis"
  201. for i in range(int(split_per) * n_obs):
  202. split_ix = seeded_rand.randint(n_obs + i)
  203. prev_w = weights[split_ix]
  204. q = seeded_rand.rand()
  205. weights[split_ix] = q * prev_w
  206. weights = np.append(weights, (1. - q) * prev_w)
  207. arrays = [np.append(a, mytake(a, split_ix, axis=axis),
  208. axis=axis) for a in arrays]
  209. return arrays, weights
  210. def _rough_check(a, b, compare_assert=partial(assert_allclose, atol=1e-5),
  211. key=lambda x: x, w=None):
  212. check_a = key(a)
  213. check_b = key(b)
  214. try:
  215. if np.array(check_a != check_b).any(): # try strict equality for string types
  216. compare_assert(check_a, check_b)
  217. except AttributeError: # masked array
  218. compare_assert(check_a, check_b)
  219. except (TypeError, ValueError): # nested data structure
  220. for a_i, b_i in zip(check_a, check_b):
  221. _rough_check(a_i, b_i, compare_assert=compare_assert)
  222. # diff from test_stats:
  223. # n_args=2, weight_arg='w', default_axis=None
  224. # ma_safe = False, nan_safe = False
  225. def _weight_checked(fn, n_args=2, default_axis=None, key=lambda x: x, weight_arg='w',
  226. squeeze=True, silent=False,
  227. ones_test=True, const_test=True, dup_test=True,
  228. split_test=True, dud_test=True, ma_safe=False, ma_very_safe=False, nan_safe=False,
  229. split_per=1.0, seed=0, compare_assert=partial(assert_allclose, atol=1e-5)):
  230. """runs fn on its arguments 2 or 3 ways, checks that the results are the same,
  231. then returns the same thing it would have returned before"""
  232. @wraps(fn)
  233. def wrapped(*args, **kwargs):
  234. result = fn(*args, **kwargs)
  235. arrays = args[:n_args]
  236. rest = args[n_args:]
  237. weights = kwargs.get(weight_arg, None)
  238. axis = kwargs.get('axis', default_axis)
  239. chked = _chk_weights(arrays, weights=weights, axis=axis, force_weights=True, mask_screen=True)
  240. arrays, weights, axis = chked[:-2], chked[-2], chked[-1]
  241. if squeeze:
  242. arrays = [np.atleast_1d(a.squeeze()) for a in arrays]
  243. try:
  244. # WEIGHTS CHECK 1: EQUAL WEIGHTED OBESERVATIONS
  245. args = tuple(arrays) + rest
  246. if ones_test:
  247. kwargs[weight_arg] = weights
  248. _rough_check(result, fn(*args, **kwargs), key=key)
  249. if const_test:
  250. kwargs[weight_arg] = weights * 101.0
  251. _rough_check(result, fn(*args, **kwargs), key=key)
  252. kwargs[weight_arg] = weights * 0.101
  253. try:
  254. _rough_check(result, fn(*args, **kwargs), key=key)
  255. except Exception as e:
  256. raise type(e)((e, arrays, weights)) from e
  257. # WEIGHTS CHECK 2: ADDL 0-WEIGHTED OBS
  258. if dud_test:
  259. # add randomly resampled rows, weighted at 0
  260. dud_arrays, dud_weights = _rand_split(arrays, weights, axis, split_per=split_per, seed=seed)
  261. dud_weights[:weights.size] = weights # not exactly 1 because of masked arrays
  262. dud_weights[weights.size:] = 0
  263. dud_args = tuple(dud_arrays) + rest
  264. kwargs[weight_arg] = dud_weights
  265. _rough_check(result, fn(*dud_args, **kwargs), key=key)
  266. # increase the value of those 0-weighted rows
  267. for a in dud_arrays:
  268. indexer = [slice(None)] * a.ndim
  269. indexer[axis] = slice(weights.size, None)
  270. indexer = tuple(indexer)
  271. a[indexer] = a[indexer] * 101
  272. dud_args = tuple(dud_arrays) + rest
  273. _rough_check(result, fn(*dud_args, **kwargs), key=key)
  274. # set those 0-weighted rows to NaNs
  275. for a in dud_arrays:
  276. indexer = [slice(None)] * a.ndim
  277. indexer[axis] = slice(weights.size, None)
  278. indexer = tuple(indexer)
  279. a[indexer] = a[indexer] * np.nan
  280. if kwargs.get("nan_policy", None) == "omit" and nan_safe:
  281. dud_args = tuple(dud_arrays) + rest
  282. _rough_check(result, fn(*dud_args, **kwargs), key=key)
  283. # mask out those nan values
  284. if ma_safe:
  285. dud_arrays = [np.ma.masked_invalid(a) for a in dud_arrays]
  286. dud_args = tuple(dud_arrays) + rest
  287. _rough_check(result, fn(*dud_args, **kwargs), key=key)
  288. if ma_very_safe:
  289. kwargs[weight_arg] = None
  290. _rough_check(result, fn(*dud_args, **kwargs), key=key)
  291. del dud_arrays, dud_args, dud_weights
  292. # WEIGHTS CHECK 3: DUPLICATE DATA (DUMB SPLITTING)
  293. if dup_test:
  294. dup_arrays = [np.append(a, a, axis=axis) for a in arrays]
  295. dup_weights = np.append(weights, weights) / 2.0
  296. dup_args = tuple(dup_arrays) + rest
  297. kwargs[weight_arg] = dup_weights
  298. _rough_check(result, fn(*dup_args, **kwargs), key=key)
  299. del dup_args, dup_arrays, dup_weights
  300. # WEIGHT CHECK 3: RANDOM SPLITTING
  301. if split_test and split_per > 0:
  302. split_arrays, split_weights = _rand_split(arrays, weights, axis, split_per=split_per, seed=seed)
  303. split_args = tuple(split_arrays) + rest
  304. kwargs[weight_arg] = split_weights
  305. _rough_check(result, fn(*split_args, **kwargs), key=key)
  306. except NotImplementedError as e:
  307. # when some combination of arguments makes weighting impossible,
  308. # this is the desired response
  309. if not silent:
  310. warnings.warn("%s NotImplemented weights: %s" % (fn.__name__, e))
  311. return result
  312. return wrapped
  313. wcdist = _weight_checked(cdist, default_axis=1, squeeze=False)
  314. wcdist_no_const = _weight_checked(cdist, default_axis=1, squeeze=False, const_test=False)
  315. wpdist = _weight_checked(pdist, default_axis=1, squeeze=False, n_args=1)
  316. wpdist_no_const = _weight_checked(pdist, default_axis=1, squeeze=False, const_test=False, n_args=1)
  317. wrogerstanimoto = _weight_checked(rogerstanimoto)
  318. wmatching = whamming = _weight_checked(hamming, dud_test=False)
  319. wyule = _weight_checked(yule)
  320. wdice = _weight_checked(dice)
  321. wcityblock = _weight_checked(cityblock)
  322. wchebyshev = _weight_checked(chebyshev)
  323. wcosine = _weight_checked(cosine)
  324. wcorrelation = _weight_checked(correlation)
  325. wkulsinski = _weight_checked(kulsinski)
  326. wkulczynski1 = _weight_checked(kulczynski1)
  327. wjaccard = _weight_checked(jaccard)
  328. weuclidean = _weight_checked(euclidean, const_test=False)
  329. wsqeuclidean = _weight_checked(sqeuclidean, const_test=False)
  330. wbraycurtis = _weight_checked(braycurtis)
  331. wcanberra = _weight_checked(canberra, const_test=False)
  332. wsokalsneath = _weight_checked(sokalsneath)
  333. wsokalmichener = _weight_checked(sokalmichener)
  334. wrussellrao = _weight_checked(russellrao)
  335. class TestCdist:
  336. def setup_method(self):
  337. self.rnd_eo_names = ['random-float32-data', 'random-int-data',
  338. 'random-uint-data', 'random-double-data',
  339. 'random-bool-data']
  340. self.valid_upcasts = {'bool': [np.uint, np.int_, np.float32, np.double],
  341. 'uint': [np.int_, np.float32, np.double],
  342. 'int': [np.float32, np.double],
  343. 'float32': [np.double]}
  344. def test_cdist_extra_args(self):
  345. # Tests that args and kwargs are correctly handled
  346. def _my_metric(x, y, arg, kwarg=1, kwarg2=2):
  347. return arg + kwarg + kwarg2
  348. X1 = [[1., 2., 3.], [1.2, 2.3, 3.4], [2.2, 2.3, 4.4]]
  349. X2 = [[7., 5., 8.], [7.5, 5.8, 8.4], [5.5, 5.8, 4.4]]
  350. kwargs = {'N0tV4l1D_p4raM': 3.14, "w":np.arange(3)}
  351. args = [3.14] * 200
  352. for metric in _METRICS_NAMES:
  353. with np.testing.suppress_warnings() as sup:
  354. if metric == "kulsinski":
  355. sup.filter(DeprecationWarning,
  356. "Kulsinski has been deprecated from")
  357. assert_raises(TypeError, cdist, X1, X2,
  358. metric=metric, **kwargs)
  359. assert_raises(TypeError, cdist, X1, X2,
  360. metric=eval(metric), **kwargs)
  361. assert_raises(TypeError, cdist, X1, X2,
  362. metric="test_" + metric, **kwargs)
  363. assert_raises(TypeError, cdist, X1, X2,
  364. metric=metric, *args)
  365. assert_raises(TypeError, cdist, X1, X2,
  366. metric=eval(metric), *args)
  367. assert_raises(TypeError, cdist, X1, X2,
  368. metric="test_" + metric, *args)
  369. assert_raises(TypeError, cdist, X1, X2, _my_metric)
  370. assert_raises(TypeError, cdist, X1, X2, _my_metric, *args)
  371. assert_raises(TypeError, cdist, X1, X2, _my_metric, **kwargs)
  372. assert_raises(TypeError, cdist, X1, X2, _my_metric,
  373. kwarg=2.2, kwarg2=3.3)
  374. assert_raises(TypeError, cdist, X1, X2, _my_metric, 1, 2, kwarg=2.2)
  375. assert_raises(TypeError, cdist, X1, X2, _my_metric, 1.1, 2.2, 3.3)
  376. assert_raises(TypeError, cdist, X1, X2, _my_metric, 1.1, 2.2)
  377. assert_raises(TypeError, cdist, X1, X2, _my_metric, 1.1)
  378. assert_raises(TypeError, cdist, X1, X2, _my_metric, 1.1,
  379. kwarg=2.2, kwarg2=3.3)
  380. # this should work
  381. assert_allclose(cdist(X1, X2, metric=_my_metric,
  382. arg=1.1, kwarg2=3.3), 5.4)
  383. def test_cdist_euclidean_random_unicode(self):
  384. eps = 1e-15
  385. X1 = eo['cdist-X1']
  386. X2 = eo['cdist-X2']
  387. Y1 = wcdist_no_const(X1, X2, 'euclidean')
  388. Y2 = wcdist_no_const(X1, X2, 'test_euclidean')
  389. assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2)
  390. @pytest.mark.parametrize("p", [0.1, 0.25, 1.0, 1.23,
  391. 2.0, 3.8, 4.6, np.inf])
  392. def test_cdist_minkowski_random(self, p):
  393. eps = 1e-13
  394. X1 = eo['cdist-X1']
  395. X2 = eo['cdist-X2']
  396. Y1 = wcdist_no_const(X1, X2, 'minkowski', p=p)
  397. Y2 = wcdist_no_const(X1, X2, 'test_minkowski', p=p)
  398. assert_allclose(Y1, Y2, atol=0, rtol=eps, verbose=verbose > 2)
  399. def test_cdist_cosine_random(self):
  400. eps = 1e-14
  401. X1 = eo['cdist-X1']
  402. X2 = eo['cdist-X2']
  403. Y1 = wcdist(X1, X2, 'cosine')
  404. # Naive implementation
  405. def norms(X):
  406. return np.linalg.norm(X, axis=1).reshape(-1, 1)
  407. Y2 = 1 - np.dot((X1 / norms(X1)), (X2 / norms(X2)).T)
  408. assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2)
  409. def test_cdist_mahalanobis(self):
  410. # 1-dimensional observations
  411. x1 = np.array([[2], [3]])
  412. x2 = np.array([[2], [5]])
  413. dist = cdist(x1, x2, metric='mahalanobis')
  414. assert_allclose(dist, [[0.0, np.sqrt(4.5)], [np.sqrt(0.5), np.sqrt(2)]])
  415. # 2-dimensional observations
  416. x1 = np.array([[0, 0], [-1, 0]])
  417. x2 = np.array([[0, 2], [1, 0], [0, -2]])
  418. dist = cdist(x1, x2, metric='mahalanobis')
  419. rt2 = np.sqrt(2)
  420. assert_allclose(dist, [[rt2, rt2, rt2], [2, 2 * rt2, 2]])
  421. # Too few observations
  422. assert_raises(ValueError,
  423. cdist, [[0, 1]], [[2, 3]], metric='mahalanobis')
  424. def test_cdist_custom_notdouble(self):
  425. class myclass:
  426. pass
  427. def _my_metric(x, y):
  428. if not isinstance(x[0], myclass) or not isinstance(y[0], myclass):
  429. raise ValueError("Type has been changed")
  430. return 1.123
  431. data = np.array([[myclass()]], dtype=object)
  432. cdist_y = cdist(data, data, metric=_my_metric)
  433. right_y = 1.123
  434. assert_equal(cdist_y, right_y, verbose=verbose > 2)
  435. def _check_calling_conventions(self, X1, X2, metric, eps=1e-07, **kwargs):
  436. # helper function for test_cdist_calling_conventions
  437. try:
  438. y1 = cdist(X1, X2, metric=metric, **kwargs)
  439. y2 = cdist(X1, X2, metric=eval(metric), **kwargs)
  440. y3 = cdist(X1, X2, metric="test_" + metric, **kwargs)
  441. except Exception as e:
  442. e_cls = e.__class__
  443. if verbose > 2:
  444. print(e_cls.__name__)
  445. print(e)
  446. assert_raises(e_cls, cdist, X1, X2, metric=metric, **kwargs)
  447. assert_raises(e_cls, cdist, X1, X2, metric=eval(metric), **kwargs)
  448. assert_raises(e_cls, cdist, X1, X2, metric="test_" + metric, **kwargs)
  449. else:
  450. assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2)
  451. assert_allclose(y1, y3, rtol=eps, verbose=verbose > 2)
  452. def test_cdist_calling_conventions(self):
  453. # Ensures that specifying the metric with a str or scipy function
  454. # gives the same behaviour (i.e. same result or same exception).
  455. # NOTE: The correctness should be checked within each metric tests.
  456. for eo_name in self.rnd_eo_names:
  457. # subsampling input data to speed-up tests
  458. # NOTE: num samples needs to be > than dimensions for mahalanobis
  459. X1 = eo[eo_name][::5, ::-2]
  460. X2 = eo[eo_name][1::5, ::2]
  461. for metric in _METRICS_NAMES:
  462. if verbose > 2:
  463. print("testing: ", metric, " with: ", eo_name)
  464. if metric in {'dice', 'yule', 'kulsinski',
  465. 'rogerstanimoto',
  466. 'russellrao', 'sokalmichener',
  467. 'sokalsneath',
  468. 'kulczynski1'} and 'bool' not in eo_name:
  469. # python version permits non-bools e.g. for fuzzy logic
  470. continue
  471. with np.testing.suppress_warnings() as sup:
  472. if metric == "kulsinski":
  473. sup.filter(DeprecationWarning,
  474. "Kulsinski has been deprecated from")
  475. self._check_calling_conventions(X1, X2, metric)
  476. # Testing built-in metrics with extra args
  477. if metric == "seuclidean":
  478. X12 = np.vstack([X1, X2]).astype(np.double)
  479. V = np.var(X12, axis=0, ddof=1)
  480. self._check_calling_conventions(X1, X2, metric, V=V)
  481. elif metric == "mahalanobis":
  482. X12 = np.vstack([X1, X2]).astype(np.double)
  483. V = np.atleast_2d(np.cov(X12.T))
  484. VI = np.array(np.linalg.inv(V).T)
  485. self._check_calling_conventions(X1, X2, metric, VI=VI)
  486. def test_cdist_dtype_equivalence(self):
  487. # Tests that the result is not affected by type up-casting
  488. eps = 1e-07
  489. tests = [(eo['random-bool-data'], self.valid_upcasts['bool']),
  490. (eo['random-uint-data'], self.valid_upcasts['uint']),
  491. (eo['random-int-data'], self.valid_upcasts['int']),
  492. (eo['random-float32-data'], self.valid_upcasts['float32'])]
  493. for metric in _METRICS_NAMES:
  494. for test in tests:
  495. X1 = test[0][::5, ::-2]
  496. X2 = test[0][1::5, ::2]
  497. try:
  498. y1 = cdist(X1, X2, metric=metric)
  499. except Exception as e:
  500. e_cls = e.__class__
  501. if verbose > 2:
  502. print(e_cls.__name__)
  503. print(e)
  504. for new_type in test[1]:
  505. X1new = new_type(X1)
  506. X2new = new_type(X2)
  507. assert_raises(e_cls, cdist, X1new, X2new, metric=metric)
  508. else:
  509. for new_type in test[1]:
  510. y2 = cdist(new_type(X1), new_type(X2), metric=metric)
  511. assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2)
  512. def test_cdist_out(self):
  513. # Test that out parameter works properly
  514. eps = 1e-15
  515. X1 = eo['cdist-X1']
  516. X2 = eo['cdist-X2']
  517. out_r, out_c = X1.shape[0], X2.shape[0]
  518. for metric in _METRICS_NAMES:
  519. kwargs = dict()
  520. if metric == 'minkowski':
  521. kwargs['p'] = 1.23
  522. out1 = np.empty((out_r, out_c), dtype=np.double)
  523. Y1 = cdist(X1, X2, metric, **kwargs)
  524. Y2 = cdist(X1, X2, metric, out=out1, **kwargs)
  525. # test that output is numerically equivalent
  526. assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2)
  527. # test that Y_test1 and out1 are the same object
  528. assert_(Y2 is out1)
  529. # test for incorrect shape
  530. out2 = np.empty((out_r-1, out_c+1), dtype=np.double)
  531. assert_raises(ValueError,
  532. cdist, X1, X2, metric, out=out2, **kwargs)
  533. # test for C-contiguous order
  534. out3 = np.empty(
  535. (2 * out_r, 2 * out_c), dtype=np.double)[::2, ::2]
  536. out4 = np.empty((out_r, out_c), dtype=np.double, order='F')
  537. assert_raises(ValueError,
  538. cdist, X1, X2, metric, out=out3, **kwargs)
  539. assert_raises(ValueError,
  540. cdist, X1, X2, metric, out=out4, **kwargs)
  541. # test for incorrect dtype
  542. out5 = np.empty((out_r, out_c), dtype=np.int64)
  543. assert_raises(ValueError,
  544. cdist, X1, X2, metric, out=out5, **kwargs)
  545. def test_striding(self):
  546. # test that striding is handled correct with calls to
  547. # _copy_array_if_base_present
  548. eps = 1e-15
  549. X1 = eo['cdist-X1'][::2, ::2]
  550. X2 = eo['cdist-X2'][::2, ::2]
  551. X1_copy = X1.copy()
  552. X2_copy = X2.copy()
  553. # confirm equivalence
  554. assert_equal(X1, X1_copy)
  555. assert_equal(X2, X2_copy)
  556. # confirm contiguity
  557. assert_(not X1.flags.c_contiguous)
  558. assert_(not X2.flags.c_contiguous)
  559. assert_(X1_copy.flags.c_contiguous)
  560. assert_(X2_copy.flags.c_contiguous)
  561. for metric in _METRICS_NAMES:
  562. kwargs = dict()
  563. if metric == 'minkowski':
  564. kwargs['p'] = 1.23
  565. Y1 = cdist(X1, X2, metric, **kwargs)
  566. Y2 = cdist(X1_copy, X2_copy, metric, **kwargs)
  567. # test that output is numerically equivalent
  568. assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2)
  569. def test_cdist_refcount(self):
  570. for metric in _METRICS_NAMES:
  571. x1 = np.random.rand(10, 10)
  572. x2 = np.random.rand(10, 10)
  573. kwargs = dict()
  574. if metric == 'minkowski':
  575. kwargs['p'] = 1.23
  576. out = cdist(x1, x2, metric=metric, **kwargs)
  577. # Check reference counts aren't messed up. If we only hold weak
  578. # references, the arrays should be deallocated.
  579. weak_refs = [weakref.ref(v) for v in (x1, x2, out)]
  580. del x1, x2, out
  581. if IS_PYPY:
  582. break_cycles()
  583. assert all(weak_ref() is None for weak_ref in weak_refs)
  584. class TestPdist:
  585. def setup_method(self):
  586. self.rnd_eo_names = ['random-float32-data', 'random-int-data',
  587. 'random-uint-data', 'random-double-data',
  588. 'random-bool-data']
  589. self.valid_upcasts = {'bool': [np.uint, np.int_, np.float32, np.double],
  590. 'uint': [np.int_, np.float32, np.double],
  591. 'int': [np.float32, np.double],
  592. 'float32': [np.double]}
  593. def test_pdist_extra_args(self):
  594. # Tests that args and kwargs are correctly handled
  595. def _my_metric(x, y, arg, kwarg=1, kwarg2=2):
  596. return arg + kwarg + kwarg2
  597. X1 = [[1., 2.], [1.2, 2.3], [2.2, 2.3]]
  598. kwargs = {'N0tV4l1D_p4raM': 3.14, "w":np.arange(2)}
  599. args = [3.14] * 200
  600. for metric in _METRICS_NAMES:
  601. with np.testing.suppress_warnings() as sup:
  602. if metric == "kulsinski":
  603. sup.filter(DeprecationWarning,
  604. "Kulsinski has been deprecated from")
  605. assert_raises(TypeError, pdist, X1, metric=metric, **kwargs)
  606. assert_raises(TypeError, pdist, X1,
  607. metric=eval(metric), **kwargs)
  608. assert_raises(TypeError, pdist, X1,
  609. metric="test_" + metric, **kwargs)
  610. assert_raises(TypeError, pdist, X1, metric=metric, *args)
  611. assert_raises(TypeError, pdist, X1, metric=eval(metric), *args)
  612. assert_raises(TypeError, pdist, X1,
  613. metric="test_" + metric, *args)
  614. assert_raises(TypeError, pdist, X1, _my_metric)
  615. assert_raises(TypeError, pdist, X1, _my_metric, *args)
  616. assert_raises(TypeError, pdist, X1, _my_metric, **kwargs)
  617. assert_raises(TypeError, pdist, X1, _my_metric,
  618. kwarg=2.2, kwarg2=3.3)
  619. assert_raises(TypeError, pdist, X1, _my_metric, 1, 2, kwarg=2.2)
  620. assert_raises(TypeError, pdist, X1, _my_metric, 1.1, 2.2, 3.3)
  621. assert_raises(TypeError, pdist, X1, _my_metric, 1.1, 2.2)
  622. assert_raises(TypeError, pdist, X1, _my_metric, 1.1)
  623. assert_raises(TypeError, pdist, X1, _my_metric, 1.1,
  624. kwarg=2.2, kwarg2=3.3)
  625. # these should work
  626. assert_allclose(pdist(X1, metric=_my_metric,
  627. arg=1.1, kwarg2=3.3), 5.4)
  628. def test_pdist_euclidean_random(self):
  629. eps = 1e-07
  630. X = eo['pdist-double-inp']
  631. Y_right = eo['pdist-euclidean']
  632. Y_test1 = wpdist_no_const(X, 'euclidean')
  633. assert_allclose(Y_test1, Y_right, rtol=eps)
  634. def test_pdist_euclidean_random_u(self):
  635. eps = 1e-07
  636. X = eo['pdist-double-inp']
  637. Y_right = eo['pdist-euclidean']
  638. Y_test1 = wpdist_no_const(X, 'euclidean')
  639. assert_allclose(Y_test1, Y_right, rtol=eps)
  640. def test_pdist_euclidean_random_float32(self):
  641. eps = 1e-07
  642. X = np.float32(eo['pdist-double-inp'])
  643. Y_right = eo['pdist-euclidean']
  644. Y_test1 = wpdist_no_const(X, 'euclidean')
  645. assert_allclose(Y_test1, Y_right, rtol=eps)
  646. def test_pdist_euclidean_random_nonC(self):
  647. eps = 1e-07
  648. X = eo['pdist-double-inp']
  649. Y_right = eo['pdist-euclidean']
  650. Y_test2 = wpdist_no_const(X, 'test_euclidean')
  651. assert_allclose(Y_test2, Y_right, rtol=eps)
  652. @pytest.mark.slow
  653. def test_pdist_euclidean_iris_double(self):
  654. eps = 1e-7
  655. X = eo['iris']
  656. Y_right = eo['pdist-euclidean-iris']
  657. Y_test1 = wpdist_no_const(X, 'euclidean')
  658. assert_allclose(Y_test1, Y_right, rtol=eps)
  659. @pytest.mark.slow
  660. def test_pdist_euclidean_iris_float32(self):
  661. eps = 1e-5
  662. X = np.float32(eo['iris'])
  663. Y_right = eo['pdist-euclidean-iris']
  664. Y_test1 = wpdist_no_const(X, 'euclidean')
  665. assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)
  666. @pytest.mark.slow
  667. def test_pdist_euclidean_iris_nonC(self):
  668. # Test pdist(X, 'test_euclidean') [the non-C implementation] on the
  669. # Iris data set.
  670. eps = 1e-7
  671. X = eo['iris']
  672. Y_right = eo['pdist-euclidean-iris']
  673. Y_test2 = wpdist_no_const(X, 'test_euclidean')
  674. assert_allclose(Y_test2, Y_right, rtol=eps)
  675. def test_pdist_seuclidean_random(self):
  676. eps = 1e-7
  677. X = eo['pdist-double-inp']
  678. Y_right = eo['pdist-seuclidean']
  679. Y_test1 = pdist(X, 'seuclidean')
  680. assert_allclose(Y_test1, Y_right, rtol=eps)
  681. def test_pdist_seuclidean_random_float32(self):
  682. eps = 1e-7
  683. X = np.float32(eo['pdist-double-inp'])
  684. Y_right = eo['pdist-seuclidean']
  685. Y_test1 = pdist(X, 'seuclidean')
  686. assert_allclose(Y_test1, Y_right, rtol=eps)
  687. # Check no error is raise when V has float32 dtype (#11171).
  688. V = np.var(X, axis=0, ddof=1)
  689. Y_test2 = pdist(X, 'seuclidean', V=V)
  690. assert_allclose(Y_test2, Y_right, rtol=eps)
  691. def test_pdist_seuclidean_random_nonC(self):
  692. # Test pdist(X, 'test_sqeuclidean') [the non-C implementation]
  693. eps = 1e-07
  694. X = eo['pdist-double-inp']
  695. Y_right = eo['pdist-seuclidean']
  696. Y_test2 = pdist(X, 'test_seuclidean')
  697. assert_allclose(Y_test2, Y_right, rtol=eps)
  698. def test_pdist_seuclidean_iris(self):
  699. eps = 1e-7
  700. X = eo['iris']
  701. Y_right = eo['pdist-seuclidean-iris']
  702. Y_test1 = pdist(X, 'seuclidean')
  703. assert_allclose(Y_test1, Y_right, rtol=eps)
  704. def test_pdist_seuclidean_iris_float32(self):
  705. # Tests pdist(X, 'seuclidean') on the Iris data set (float32).
  706. eps = 1e-5
  707. X = np.float32(eo['iris'])
  708. Y_right = eo['pdist-seuclidean-iris']
  709. Y_test1 = pdist(X, 'seuclidean')
  710. assert_allclose(Y_test1, Y_right, rtol=eps)
  711. def test_pdist_seuclidean_iris_nonC(self):
  712. # Test pdist(X, 'test_seuclidean') [the non-C implementation] on the
  713. # Iris data set.
  714. eps = 1e-7
  715. X = eo['iris']
  716. Y_right = eo['pdist-seuclidean-iris']
  717. Y_test2 = pdist(X, 'test_seuclidean')
  718. assert_allclose(Y_test2, Y_right, rtol=eps)
  719. def test_pdist_cosine_random(self):
  720. eps = 1e-7
  721. X = eo['pdist-double-inp']
  722. Y_right = eo['pdist-cosine']
  723. Y_test1 = wpdist(X, 'cosine')
  724. assert_allclose(Y_test1, Y_right, rtol=eps)
  725. def test_pdist_cosine_random_float32(self):
  726. eps = 1e-7
  727. X = np.float32(eo['pdist-double-inp'])
  728. Y_right = eo['pdist-cosine']
  729. Y_test1 = wpdist(X, 'cosine')
  730. assert_allclose(Y_test1, Y_right, rtol=eps)
  731. def test_pdist_cosine_random_nonC(self):
  732. # Test pdist(X, 'test_cosine') [the non-C implementation]
  733. eps = 1e-7
  734. X = eo['pdist-double-inp']
  735. Y_right = eo['pdist-cosine']
  736. Y_test2 = wpdist(X, 'test_cosine')
  737. assert_allclose(Y_test2, Y_right, rtol=eps)
  738. @pytest.mark.slow
  739. def test_pdist_cosine_iris(self):
  740. eps = 1e-05
  741. X = eo['iris']
  742. Y_right = eo['pdist-cosine-iris']
  743. Y_test1 = wpdist(X, 'cosine')
  744. assert_allclose(Y_test1, Y_right, atol=eps)
  745. @pytest.mark.slow
  746. def test_pdist_cosine_iris_float32(self):
  747. eps = 1e-05
  748. X = np.float32(eo['iris'])
  749. Y_right = eo['pdist-cosine-iris']
  750. Y_test1 = wpdist(X, 'cosine')
  751. assert_allclose(Y_test1, Y_right, atol=eps, verbose=verbose > 2)
  752. @pytest.mark.slow
  753. def test_pdist_cosine_iris_nonC(self):
  754. eps = 1e-05
  755. X = eo['iris']
  756. Y_right = eo['pdist-cosine-iris']
  757. Y_test2 = wpdist(X, 'test_cosine')
  758. assert_allclose(Y_test2, Y_right, atol=eps)
  759. def test_pdist_cosine_bounds(self):
  760. # Test adapted from @joernhees's example at gh-5208: case where
  761. # cosine distance used to be negative. XXX: very sensitive to the
  762. # specific norm computation.
  763. x = np.abs(np.random.RandomState(1337).rand(91))
  764. X = np.vstack([x, x])
  765. assert_(wpdist(X, 'cosine')[0] >= 0,
  766. msg='cosine distance should be non-negative')
  767. def test_pdist_cityblock_random(self):
  768. eps = 1e-7
  769. X = eo['pdist-double-inp']
  770. Y_right = eo['pdist-cityblock']
  771. Y_test1 = wpdist_no_const(X, 'cityblock')
  772. assert_allclose(Y_test1, Y_right, rtol=eps)
  773. def test_pdist_cityblock_random_float32(self):
  774. eps = 1e-7
  775. X = np.float32(eo['pdist-double-inp'])
  776. Y_right = eo['pdist-cityblock']
  777. Y_test1 = wpdist_no_const(X, 'cityblock')
  778. assert_allclose(Y_test1, Y_right, rtol=eps)
  779. def test_pdist_cityblock_random_nonC(self):
  780. eps = 1e-7
  781. X = eo['pdist-double-inp']
  782. Y_right = eo['pdist-cityblock']
  783. Y_test2 = wpdist_no_const(X, 'test_cityblock')
  784. assert_allclose(Y_test2, Y_right, rtol=eps)
  785. @pytest.mark.slow
  786. def test_pdist_cityblock_iris(self):
  787. eps = 1e-14
  788. X = eo['iris']
  789. Y_right = eo['pdist-cityblock-iris']
  790. Y_test1 = wpdist_no_const(X, 'cityblock')
  791. assert_allclose(Y_test1, Y_right, rtol=eps)
  792. @pytest.mark.slow
  793. def test_pdist_cityblock_iris_float32(self):
  794. eps = 1e-5
  795. X = np.float32(eo['iris'])
  796. Y_right = eo['pdist-cityblock-iris']
  797. Y_test1 = wpdist_no_const(X, 'cityblock')
  798. assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)
  799. @pytest.mark.slow
  800. def test_pdist_cityblock_iris_nonC(self):
  801. # Test pdist(X, 'test_cityblock') [the non-C implementation] on the
  802. # Iris data set.
  803. eps = 1e-14
  804. X = eo['iris']
  805. Y_right = eo['pdist-cityblock-iris']
  806. Y_test2 = wpdist_no_const(X, 'test_cityblock')
  807. assert_allclose(Y_test2, Y_right, rtol=eps)
  808. def test_pdist_correlation_random(self):
  809. eps = 1e-7
  810. X = eo['pdist-double-inp']
  811. Y_right = eo['pdist-correlation']
  812. Y_test1 = wpdist(X, 'correlation')
  813. assert_allclose(Y_test1, Y_right, rtol=eps)
  814. def test_pdist_correlation_random_float32(self):
  815. eps = 1e-7
  816. X = np.float32(eo['pdist-double-inp'])
  817. Y_right = eo['pdist-correlation']
  818. Y_test1 = wpdist(X, 'correlation')
  819. assert_allclose(Y_test1, Y_right, rtol=eps)
  820. def test_pdist_correlation_random_nonC(self):
  821. eps = 1e-7
  822. X = eo['pdist-double-inp']
  823. Y_right = eo['pdist-correlation']
  824. Y_test2 = wpdist(X, 'test_correlation')
  825. assert_allclose(Y_test2, Y_right, rtol=eps)
  826. @pytest.mark.slow
  827. def test_pdist_correlation_iris(self):
  828. eps = 1e-7
  829. X = eo['iris']
  830. Y_right = eo['pdist-correlation-iris']
  831. Y_test1 = wpdist(X, 'correlation')
  832. assert_allclose(Y_test1, Y_right, rtol=eps)
  833. @pytest.mark.slow
  834. def test_pdist_correlation_iris_float32(self):
  835. eps = 1e-7
  836. X = eo['iris']
  837. Y_right = np.float32(eo['pdist-correlation-iris'])
  838. Y_test1 = wpdist(X, 'correlation')
  839. assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)
  840. @pytest.mark.slow
  841. def test_pdist_correlation_iris_nonC(self):
  842. if sys.maxsize > 2**32:
  843. eps = 1e-7
  844. else:
  845. pytest.skip("see gh-16456")
  846. X = eo['iris']
  847. Y_right = eo['pdist-correlation-iris']
  848. Y_test2 = wpdist(X, 'test_correlation')
  849. assert_allclose(Y_test2, Y_right, rtol=eps)
  850. @pytest.mark.parametrize("p", [0.1, 0.25, 1.0, 2.0, 3.2, np.inf])
  851. def test_pdist_minkowski_random_p(self, p):
  852. eps = 1e-13
  853. X = eo['pdist-double-inp']
  854. Y1 = wpdist_no_const(X, 'minkowski', p=p)
  855. Y2 = wpdist_no_const(X, 'test_minkowski', p=p)
  856. assert_allclose(Y1, Y2, atol=0, rtol=eps)
  857. def test_pdist_minkowski_random(self):
  858. eps = 1e-7
  859. X = eo['pdist-double-inp']
  860. Y_right = eo['pdist-minkowski-3.2']
  861. Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2)
  862. assert_allclose(Y_test1, Y_right, rtol=eps)
  863. def test_pdist_minkowski_random_float32(self):
  864. eps = 1e-7
  865. X = np.float32(eo['pdist-double-inp'])
  866. Y_right = eo['pdist-minkowski-3.2']
  867. Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2)
  868. assert_allclose(Y_test1, Y_right, rtol=eps)
  869. def test_pdist_minkowski_random_nonC(self):
  870. eps = 1e-7
  871. X = eo['pdist-double-inp']
  872. Y_right = eo['pdist-minkowski-3.2']
  873. Y_test2 = wpdist_no_const(X, 'test_minkowski', p=3.2)
  874. assert_allclose(Y_test2, Y_right, rtol=eps)
  875. @pytest.mark.slow
  876. def test_pdist_minkowski_3_2_iris(self):
  877. eps = 1e-7
  878. X = eo['iris']
  879. Y_right = eo['pdist-minkowski-3.2-iris']
  880. Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2)
  881. assert_allclose(Y_test1, Y_right, rtol=eps)
  882. @pytest.mark.slow
  883. def test_pdist_minkowski_3_2_iris_float32(self):
  884. eps = 1e-5
  885. X = np.float32(eo['iris'])
  886. Y_right = eo['pdist-minkowski-3.2-iris']
  887. Y_test1 = wpdist_no_const(X, 'minkowski', p=3.2)
  888. assert_allclose(Y_test1, Y_right, rtol=eps)
  889. @pytest.mark.slow
  890. def test_pdist_minkowski_3_2_iris_nonC(self):
  891. eps = 1e-7
  892. X = eo['iris']
  893. Y_right = eo['pdist-minkowski-3.2-iris']
  894. Y_test2 = wpdist_no_const(X, 'test_minkowski', p=3.2)
  895. assert_allclose(Y_test2, Y_right, rtol=eps)
  896. @pytest.mark.slow
  897. def test_pdist_minkowski_5_8_iris(self):
  898. eps = 1e-7
  899. X = eo['iris']
  900. Y_right = eo['pdist-minkowski-5.8-iris']
  901. Y_test1 = wpdist_no_const(X, 'minkowski', p=5.8)
  902. assert_allclose(Y_test1, Y_right, rtol=eps)
  903. @pytest.mark.slow
  904. def test_pdist_minkowski_5_8_iris_float32(self):
  905. eps = 1e-5
  906. X = np.float32(eo['iris'])
  907. Y_right = eo['pdist-minkowski-5.8-iris']
  908. Y_test1 = wpdist_no_const(X, 'minkowski', p=5.8)
  909. assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)
  910. @pytest.mark.slow
  911. def test_pdist_minkowski_5_8_iris_nonC(self):
  912. eps = 1e-7
  913. X = eo['iris']
  914. Y_right = eo['pdist-minkowski-5.8-iris']
  915. Y_test2 = wpdist_no_const(X, 'test_minkowski', p=5.8)
  916. assert_allclose(Y_test2, Y_right, rtol=eps)
  917. def test_pdist_mahalanobis(self):
  918. # 1-dimensional observations
  919. x = np.array([2.0, 2.0, 3.0, 5.0]).reshape(-1, 1)
  920. dist = pdist(x, metric='mahalanobis')
  921. assert_allclose(dist, [0.0, np.sqrt(0.5), np.sqrt(4.5),
  922. np.sqrt(0.5), np.sqrt(4.5), np.sqrt(2.0)])
  923. # 2-dimensional observations
  924. x = np.array([[0, 0], [-1, 0], [0, 2], [1, 0], [0, -2]])
  925. dist = pdist(x, metric='mahalanobis')
  926. rt2 = np.sqrt(2)
  927. assert_allclose(dist, [rt2, rt2, rt2, rt2, 2, 2 * rt2, 2, 2, 2 * rt2, 2])
  928. # Too few observations
  929. assert_raises(ValueError,
  930. wpdist, [[0, 1], [2, 3]], metric='mahalanobis')
  931. def test_pdist_hamming_random(self):
  932. eps = 1e-15
  933. X = eo['pdist-boolean-inp']
  934. Y_right = eo['pdist-hamming']
  935. Y_test1 = wpdist(X, 'hamming')
  936. assert_allclose(Y_test1, Y_right, rtol=eps)
  937. def test_pdist_hamming_random_float32(self):
  938. eps = 1e-15
  939. X = np.float32(eo['pdist-boolean-inp'])
  940. Y_right = eo['pdist-hamming']
  941. Y_test1 = wpdist(X, 'hamming')
  942. assert_allclose(Y_test1, Y_right, rtol=eps)
  943. def test_pdist_hamming_random_nonC(self):
  944. eps = 1e-15
  945. X = eo['pdist-boolean-inp']
  946. Y_right = eo['pdist-hamming']
  947. Y_test2 = wpdist(X, 'test_hamming')
  948. assert_allclose(Y_test2, Y_right, rtol=eps)
  949. def test_pdist_dhamming_random(self):
  950. eps = 1e-15
  951. X = np.float64(eo['pdist-boolean-inp'])
  952. Y_right = eo['pdist-hamming']
  953. Y_test1 = wpdist(X, 'hamming')
  954. assert_allclose(Y_test1, Y_right, rtol=eps)
  955. def test_pdist_dhamming_random_float32(self):
  956. eps = 1e-15
  957. X = np.float32(eo['pdist-boolean-inp'])
  958. Y_right = eo['pdist-hamming']
  959. Y_test1 = wpdist(X, 'hamming')
  960. assert_allclose(Y_test1, Y_right, rtol=eps)
  961. def test_pdist_dhamming_random_nonC(self):
  962. eps = 1e-15
  963. X = np.float64(eo['pdist-boolean-inp'])
  964. Y_right = eo['pdist-hamming']
  965. Y_test2 = wpdist(X, 'test_hamming')
  966. assert_allclose(Y_test2, Y_right, rtol=eps)
  967. def test_pdist_jaccard_random(self):
  968. eps = 1e-8
  969. X = eo['pdist-boolean-inp']
  970. Y_right = eo['pdist-jaccard']
  971. Y_test1 = wpdist(X, 'jaccard')
  972. assert_allclose(Y_test1, Y_right, rtol=eps)
  973. def test_pdist_jaccard_random_float32(self):
  974. eps = 1e-8
  975. X = np.float32(eo['pdist-boolean-inp'])
  976. Y_right = eo['pdist-jaccard']
  977. Y_test1 = wpdist(X, 'jaccard')
  978. assert_allclose(Y_test1, Y_right, rtol=eps)
  979. def test_pdist_jaccard_random_nonC(self):
  980. eps = 1e-8
  981. X = eo['pdist-boolean-inp']
  982. Y_right = eo['pdist-jaccard']
  983. Y_test2 = wpdist(X, 'test_jaccard')
  984. assert_allclose(Y_test2, Y_right, rtol=eps)
  985. def test_pdist_djaccard_random(self):
  986. eps = 1e-8
  987. X = np.float64(eo['pdist-boolean-inp'])
  988. Y_right = eo['pdist-jaccard']
  989. Y_test1 = wpdist(X, 'jaccard')
  990. assert_allclose(Y_test1, Y_right, rtol=eps)
  991. def test_pdist_djaccard_random_float32(self):
  992. eps = 1e-8
  993. X = np.float32(eo['pdist-boolean-inp'])
  994. Y_right = eo['pdist-jaccard']
  995. Y_test1 = wpdist(X, 'jaccard')
  996. assert_allclose(Y_test1, Y_right, rtol=eps)
  997. def test_pdist_djaccard_allzeros(self):
  998. eps = 1e-15
  999. Y = pdist(np.zeros((5, 3)), 'jaccard')
  1000. assert_allclose(np.zeros(10), Y, rtol=eps)
  1001. def test_pdist_djaccard_random_nonC(self):
  1002. eps = 1e-8
  1003. X = np.float64(eo['pdist-boolean-inp'])
  1004. Y_right = eo['pdist-jaccard']
  1005. Y_test2 = wpdist(X, 'test_jaccard')
  1006. assert_allclose(Y_test2, Y_right, rtol=eps)
  1007. def test_pdist_jensenshannon_random(self):
  1008. eps = 1e-11
  1009. X = eo['pdist-double-inp']
  1010. Y_right = eo['pdist-jensenshannon']
  1011. Y_test1 = pdist(X, 'jensenshannon')
  1012. assert_allclose(Y_test1, Y_right, rtol=eps)
  1013. def test_pdist_jensenshannon_random_float32(self):
  1014. eps = 1e-8
  1015. X = np.float32(eo['pdist-double-inp'])
  1016. Y_right = eo['pdist-jensenshannon']
  1017. Y_test1 = pdist(X, 'jensenshannon')
  1018. assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)
  1019. def test_pdist_jensenshannon_random_nonC(self):
  1020. eps = 1e-11
  1021. X = eo['pdist-double-inp']
  1022. Y_right = eo['pdist-jensenshannon']
  1023. Y_test2 = pdist(X, 'test_jensenshannon')
  1024. assert_allclose(Y_test2, Y_right, rtol=eps)
  1025. def test_pdist_jensenshannon_iris(self):
  1026. if _is_32bit():
  1027. # Test failing on 32-bit Linux on Azure otherwise, see gh-12810
  1028. eps = 2.5e-10
  1029. else:
  1030. eps = 1e-12
  1031. X = eo['iris']
  1032. Y_right = eo['pdist-jensenshannon-iris']
  1033. Y_test1 = pdist(X, 'jensenshannon')
  1034. assert_allclose(Y_test1, Y_right, atol=eps)
  1035. def test_pdist_jensenshannon_iris_float32(self):
  1036. eps = 1e-06
  1037. X = np.float32(eo['iris'])
  1038. Y_right = eo['pdist-jensenshannon-iris']
  1039. Y_test1 = pdist(X, 'jensenshannon')
  1040. assert_allclose(Y_test1, Y_right, atol=eps, verbose=verbose > 2)
  1041. def test_pdist_jensenshannon_iris_nonC(self):
  1042. eps = 5e-5
  1043. X = eo['iris']
  1044. Y_right = eo['pdist-jensenshannon-iris']
  1045. Y_test2 = pdist(X, 'test_jensenshannon')
  1046. assert_allclose(Y_test2, Y_right, rtol=eps)
  1047. def test_pdist_djaccard_allzeros_nonC(self):
  1048. eps = 1e-15
  1049. Y = pdist(np.zeros((5, 3)), 'test_jaccard')
  1050. assert_allclose(np.zeros(10), Y, rtol=eps)
  1051. def test_pdist_chebyshev_random(self):
  1052. eps = 1e-8
  1053. X = eo['pdist-double-inp']
  1054. Y_right = eo['pdist-chebyshev']
  1055. Y_test1 = pdist(X, 'chebyshev')
  1056. assert_allclose(Y_test1, Y_right, rtol=eps)
  1057. def test_pdist_chebyshev_random_float32(self):
  1058. eps = 1e-7
  1059. X = np.float32(eo['pdist-double-inp'])
  1060. Y_right = eo['pdist-chebyshev']
  1061. Y_test1 = pdist(X, 'chebyshev')
  1062. assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)
  1063. def test_pdist_chebyshev_random_nonC(self):
  1064. eps = 1e-8
  1065. X = eo['pdist-double-inp']
  1066. Y_right = eo['pdist-chebyshev']
  1067. Y_test2 = pdist(X, 'test_chebyshev')
  1068. assert_allclose(Y_test2, Y_right, rtol=eps)
  1069. def test_pdist_chebyshev_iris(self):
  1070. eps = 1e-14
  1071. X = eo['iris']
  1072. Y_right = eo['pdist-chebyshev-iris']
  1073. Y_test1 = pdist(X, 'chebyshev')
  1074. assert_allclose(Y_test1, Y_right, rtol=eps)
  1075. def test_pdist_chebyshev_iris_float32(self):
  1076. eps = 1e-5
  1077. X = np.float32(eo['iris'])
  1078. Y_right = eo['pdist-chebyshev-iris']
  1079. Y_test1 = pdist(X, 'chebyshev')
  1080. assert_allclose(Y_test1, Y_right, rtol=eps, verbose=verbose > 2)
  1081. def test_pdist_chebyshev_iris_nonC(self):
  1082. eps = 1e-14
  1083. X = eo['iris']
  1084. Y_right = eo['pdist-chebyshev-iris']
  1085. Y_test2 = pdist(X, 'test_chebyshev')
  1086. assert_allclose(Y_test2, Y_right, rtol=eps)
  1087. def test_pdist_matching_mtica1(self):
  1088. # Test matching(*,*) with mtica example #1 (nums).
  1089. m = wmatching(np.array([1, 0, 1, 1, 0]),
  1090. np.array([1, 1, 0, 1, 1]))
  1091. m2 = wmatching(np.array([1, 0, 1, 1, 0], dtype=bool),
  1092. np.array([1, 1, 0, 1, 1], dtype=bool))
  1093. assert_allclose(m, 0.6, rtol=0, atol=1e-10)
  1094. assert_allclose(m2, 0.6, rtol=0, atol=1e-10)
  1095. def test_pdist_matching_mtica2(self):
  1096. # Test matching(*,*) with mtica example #2.
  1097. m = wmatching(np.array([1, 0, 1]),
  1098. np.array([1, 1, 0]))
  1099. m2 = wmatching(np.array([1, 0, 1], dtype=bool),
  1100. np.array([1, 1, 0], dtype=bool))
  1101. assert_allclose(m, 2 / 3, rtol=0, atol=1e-10)
  1102. assert_allclose(m2, 2 / 3, rtol=0, atol=1e-10)
  1103. def test_pdist_jaccard_mtica1(self):
  1104. m = wjaccard(np.array([1, 0, 1, 1, 0]),
  1105. np.array([1, 1, 0, 1, 1]))
  1106. m2 = wjaccard(np.array([1, 0, 1, 1, 0], dtype=bool),
  1107. np.array([1, 1, 0, 1, 1], dtype=bool))
  1108. assert_allclose(m, 0.6, rtol=0, atol=1e-10)
  1109. assert_allclose(m2, 0.6, rtol=0, atol=1e-10)
  1110. def test_pdist_jaccard_mtica2(self):
  1111. m = wjaccard(np.array([1, 0, 1]),
  1112. np.array([1, 1, 0]))
  1113. m2 = wjaccard(np.array([1, 0, 1], dtype=bool),
  1114. np.array([1, 1, 0], dtype=bool))
  1115. assert_allclose(m, 2 / 3, rtol=0, atol=1e-10)
  1116. assert_allclose(m2, 2 / 3, rtol=0, atol=1e-10)
  1117. def test_pdist_yule_mtica1(self):
  1118. m = wyule(np.array([1, 0, 1, 1, 0]),
  1119. np.array([1, 1, 0, 1, 1]))
  1120. m2 = wyule(np.array([1, 0, 1, 1, 0], dtype=bool),
  1121. np.array([1, 1, 0, 1, 1], dtype=bool))
  1122. if verbose > 2:
  1123. print(m)
  1124. assert_allclose(m, 2, rtol=0, atol=1e-10)
  1125. assert_allclose(m2, 2, rtol=0, atol=1e-10)
  1126. def test_pdist_yule_mtica2(self):
  1127. m = wyule(np.array([1, 0, 1]),
  1128. np.array([1, 1, 0]))
  1129. m2 = wyule(np.array([1, 0, 1], dtype=bool),
  1130. np.array([1, 1, 0], dtype=bool))
  1131. if verbose > 2:
  1132. print(m)
  1133. assert_allclose(m, 2, rtol=0, atol=1e-10)
  1134. assert_allclose(m2, 2, rtol=0, atol=1e-10)
  1135. def test_pdist_dice_mtica1(self):
  1136. m = wdice(np.array([1, 0, 1, 1, 0]),
  1137. np.array([1, 1, 0, 1, 1]))
  1138. m2 = wdice(np.array([1, 0, 1, 1, 0], dtype=bool),
  1139. np.array([1, 1, 0, 1, 1], dtype=bool))
  1140. if verbose > 2:
  1141. print(m)
  1142. assert_allclose(m, 3 / 7, rtol=0, atol=1e-10)
  1143. assert_allclose(m2, 3 / 7, rtol=0, atol=1e-10)
  1144. def test_pdist_dice_mtica2(self):
  1145. m = wdice(np.array([1, 0, 1]),
  1146. np.array([1, 1, 0]))
  1147. m2 = wdice(np.array([1, 0, 1], dtype=bool),
  1148. np.array([1, 1, 0], dtype=bool))
  1149. if verbose > 2:
  1150. print(m)
  1151. assert_allclose(m, 0.5, rtol=0, atol=1e-10)
  1152. assert_allclose(m2, 0.5, rtol=0, atol=1e-10)
  1153. def test_pdist_sokalsneath_mtica1(self):
  1154. m = sokalsneath(np.array([1, 0, 1, 1, 0]),
  1155. np.array([1, 1, 0, 1, 1]))
  1156. m2 = sokalsneath(np.array([1, 0, 1, 1, 0], dtype=bool),
  1157. np.array([1, 1, 0, 1, 1], dtype=bool))
  1158. if verbose > 2:
  1159. print(m)
  1160. assert_allclose(m, 3 / 4, rtol=0, atol=1e-10)
  1161. assert_allclose(m2, 3 / 4, rtol=0, atol=1e-10)
  1162. def test_pdist_sokalsneath_mtica2(self):
  1163. m = wsokalsneath(np.array([1, 0, 1]),
  1164. np.array([1, 1, 0]))
  1165. m2 = wsokalsneath(np.array([1, 0, 1], dtype=bool),
  1166. np.array([1, 1, 0], dtype=bool))
  1167. if verbose > 2:
  1168. print(m)
  1169. assert_allclose(m, 4 / 5, rtol=0, atol=1e-10)
  1170. assert_allclose(m2, 4 / 5, rtol=0, atol=1e-10)
  1171. def test_pdist_rogerstanimoto_mtica1(self):
  1172. m = wrogerstanimoto(np.array([1, 0, 1, 1, 0]),
  1173. np.array([1, 1, 0, 1, 1]))
  1174. m2 = wrogerstanimoto(np.array([1, 0, 1, 1, 0], dtype=bool),
  1175. np.array([1, 1, 0, 1, 1], dtype=bool))
  1176. if verbose > 2:
  1177. print(m)
  1178. assert_allclose(m, 3 / 4, rtol=0, atol=1e-10)
  1179. assert_allclose(m2, 3 / 4, rtol=0, atol=1e-10)
  1180. def test_pdist_rogerstanimoto_mtica2(self):
  1181. m = wrogerstanimoto(np.array([1, 0, 1]),
  1182. np.array([1, 1, 0]))
  1183. m2 = wrogerstanimoto(np.array([1, 0, 1], dtype=bool),
  1184. np.array([1, 1, 0], dtype=bool))
  1185. if verbose > 2:
  1186. print(m)
  1187. assert_allclose(m, 4 / 5, rtol=0, atol=1e-10)
  1188. assert_allclose(m2, 4 / 5, rtol=0, atol=1e-10)
  1189. def test_pdist_russellrao_mtica1(self):
  1190. m = wrussellrao(np.array([1, 0, 1, 1, 0]),
  1191. np.array([1, 1, 0, 1, 1]))
  1192. m2 = wrussellrao(np.array([1, 0, 1, 1, 0], dtype=bool),
  1193. np.array([1, 1, 0, 1, 1], dtype=bool))
  1194. if verbose > 2:
  1195. print(m)
  1196. assert_allclose(m, 3 / 5, rtol=0, atol=1e-10)
  1197. assert_allclose(m2, 3 / 5, rtol=0, atol=1e-10)
  1198. def test_pdist_russellrao_mtica2(self):
  1199. m = wrussellrao(np.array([1, 0, 1]),
  1200. np.array([1, 1, 0]))
  1201. m2 = wrussellrao(np.array([1, 0, 1], dtype=bool),
  1202. np.array([1, 1, 0], dtype=bool))
  1203. if verbose > 2:
  1204. print(m)
  1205. assert_allclose(m, 2 / 3, rtol=0, atol=1e-10)
  1206. assert_allclose(m2, 2 / 3, rtol=0, atol=1e-10)
  1207. @pytest.mark.slow
  1208. def test_pdist_canberra_match(self):
  1209. D = eo['iris']
  1210. if verbose > 2:
  1211. print(D.shape, D.dtype)
  1212. eps = 1e-15
  1213. y1 = wpdist_no_const(D, "canberra")
  1214. y2 = wpdist_no_const(D, "test_canberra")
  1215. assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2)
  1216. def test_pdist_canberra_ticket_711(self):
  1217. # Test pdist(X, 'canberra') to see if Canberra gives the right result
  1218. # as reported on gh-1238.
  1219. eps = 1e-8
  1220. pdist_y = wpdist_no_const(([3.3], [3.4]), "canberra")
  1221. right_y = 0.01492537
  1222. assert_allclose(pdist_y, right_y, atol=eps, verbose=verbose > 2)
  1223. def test_pdist_custom_notdouble(self):
  1224. # tests that when using a custom metric the data type is not altered
  1225. class myclass:
  1226. pass
  1227. def _my_metric(x, y):
  1228. if not isinstance(x[0], myclass) or not isinstance(y[0], myclass):
  1229. raise ValueError("Type has been changed")
  1230. return 1.123
  1231. data = np.array([[myclass()], [myclass()]], dtype=object)
  1232. pdist_y = pdist(data, metric=_my_metric)
  1233. right_y = 1.123
  1234. assert_equal(pdist_y, right_y, verbose=verbose > 2)
  1235. def _check_calling_conventions(self, X, metric, eps=1e-07, **kwargs):
  1236. # helper function for test_pdist_calling_conventions
  1237. try:
  1238. y1 = pdist(X, metric=metric, **kwargs)
  1239. y2 = pdist(X, metric=eval(metric), **kwargs)
  1240. y3 = pdist(X, metric="test_" + metric, **kwargs)
  1241. except Exception as e:
  1242. e_cls = e.__class__
  1243. if verbose > 2:
  1244. print(e_cls.__name__)
  1245. print(e)
  1246. assert_raises(e_cls, pdist, X, metric=metric, **kwargs)
  1247. assert_raises(e_cls, pdist, X, metric=eval(metric), **kwargs)
  1248. assert_raises(e_cls, pdist, X, metric="test_" + metric, **kwargs)
  1249. else:
  1250. assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2)
  1251. assert_allclose(y1, y3, rtol=eps, verbose=verbose > 2)
  1252. def test_pdist_calling_conventions(self):
  1253. # Ensures that specifying the metric with a str or scipy function
  1254. # gives the same behaviour (i.e. same result or same exception).
  1255. # NOTE: The correctness should be checked within each metric tests.
  1256. # NOTE: Extra args should be checked with a dedicated test
  1257. for eo_name in self.rnd_eo_names:
  1258. # subsampling input data to speed-up tests
  1259. # NOTE: num samples needs to be > than dimensions for mahalanobis
  1260. X = eo[eo_name][::5, ::2]
  1261. for metric in _METRICS_NAMES:
  1262. if verbose > 2:
  1263. print("testing: ", metric, " with: ", eo_name)
  1264. if metric in {'dice', 'yule', 'kulsinski', 'matching',
  1265. 'rogerstanimoto', 'russellrao', 'sokalmichener',
  1266. 'sokalsneath',
  1267. 'kulczynski1'} and 'bool' not in eo_name:
  1268. # python version permits non-bools e.g. for fuzzy logic
  1269. continue
  1270. with np.testing.suppress_warnings() as sup:
  1271. if metric == "kulsinski":
  1272. sup.filter(DeprecationWarning,
  1273. "Kulsinski has been deprecated from")
  1274. self._check_calling_conventions(X, metric)
  1275. # Testing built-in metrics with extra args
  1276. if metric == "seuclidean":
  1277. V = np.var(X.astype(np.double), axis=0, ddof=1)
  1278. self._check_calling_conventions(X, metric, V=V)
  1279. elif metric == "mahalanobis":
  1280. V = np.atleast_2d(np.cov(X.astype(np.double).T))
  1281. VI = np.array(np.linalg.inv(V).T)
  1282. self._check_calling_conventions(X, metric, VI=VI)
  1283. def test_pdist_dtype_equivalence(self):
  1284. # Tests that the result is not affected by type up-casting
  1285. eps = 1e-07
  1286. tests = [(eo['random-bool-data'], self.valid_upcasts['bool']),
  1287. (eo['random-uint-data'], self.valid_upcasts['uint']),
  1288. (eo['random-int-data'], self.valid_upcasts['int']),
  1289. (eo['random-float32-data'], self.valid_upcasts['float32'])]
  1290. for metric in _METRICS_NAMES:
  1291. for test in tests:
  1292. X1 = test[0][::5, ::2]
  1293. try:
  1294. y1 = pdist(X1, metric=metric)
  1295. except Exception as e:
  1296. e_cls = e.__class__
  1297. if verbose > 2:
  1298. print(e_cls.__name__)
  1299. print(e)
  1300. for new_type in test[1]:
  1301. X2 = new_type(X1)
  1302. assert_raises(e_cls, pdist, X2, metric=metric)
  1303. else:
  1304. for new_type in test[1]:
  1305. y2 = pdist(new_type(X1), metric=metric)
  1306. assert_allclose(y1, y2, rtol=eps, verbose=verbose > 2)
  1307. def test_pdist_out(self):
  1308. # Test that out parameter works properly
  1309. eps = 1e-15
  1310. X = eo['random-float32-data'][::5, ::2]
  1311. out_size = int((X.shape[0] * (X.shape[0] - 1)) / 2)
  1312. for metric in _METRICS_NAMES:
  1313. kwargs = dict()
  1314. if metric == 'minkowski':
  1315. kwargs['p'] = 1.23
  1316. out1 = np.empty(out_size, dtype=np.double)
  1317. Y_right = pdist(X, metric, **kwargs)
  1318. Y_test1 = pdist(X, metric, out=out1, **kwargs)
  1319. # test that output is numerically equivalent
  1320. assert_allclose(Y_test1, Y_right, rtol=eps)
  1321. # test that Y_test1 and out1 are the same object
  1322. assert_(Y_test1 is out1)
  1323. # test for incorrect shape
  1324. out2 = np.empty(out_size + 3, dtype=np.double)
  1325. assert_raises(ValueError, pdist, X, metric, out=out2, **kwargs)
  1326. # test for (C-)contiguous output
  1327. out3 = np.empty(2 * out_size, dtype=np.double)[::2]
  1328. assert_raises(ValueError, pdist, X, metric, out=out3, **kwargs)
  1329. # test for incorrect dtype
  1330. out5 = np.empty(out_size, dtype=np.int64)
  1331. assert_raises(ValueError, pdist, X, metric, out=out5, **kwargs)
  1332. def test_striding(self):
  1333. # test that striding is handled correct with calls to
  1334. # _copy_array_if_base_present
  1335. eps = 1e-15
  1336. X = eo['random-float32-data'][::5, ::2]
  1337. X_copy = X.copy()
  1338. # confirm contiguity
  1339. assert_(not X.flags.c_contiguous)
  1340. assert_(X_copy.flags.c_contiguous)
  1341. for metric in _METRICS_NAMES:
  1342. kwargs = dict()
  1343. if metric == 'minkowski':
  1344. kwargs['p'] = 1.23
  1345. Y1 = pdist(X, metric, **kwargs)
  1346. Y2 = pdist(X_copy, metric, **kwargs)
  1347. # test that output is numerically equivalent
  1348. assert_allclose(Y1, Y2, rtol=eps, verbose=verbose > 2)
  1349. class TestSomeDistanceFunctions:
  1350. def setup_method(self):
  1351. # 1D arrays
  1352. x = np.array([1.0, 2.0, 3.0])
  1353. y = np.array([1.0, 1.0, 5.0])
  1354. self.cases = [(x, y)]
  1355. def test_minkowski(self):
  1356. for x, y in self.cases:
  1357. dist1 = minkowski(x, y, p=1)
  1358. assert_almost_equal(dist1, 3.0)
  1359. dist1p5 = minkowski(x, y, p=1.5)
  1360. assert_almost_equal(dist1p5, (1.0 + 2.0**1.5)**(2. / 3))
  1361. dist2 = minkowski(x, y, p=2)
  1362. assert_almost_equal(dist2, 5.0 ** 0.5)
  1363. dist0p25 = minkowski(x, y, p=0.25)
  1364. assert_almost_equal(dist0p25, (1.0 + 2.0 ** 0.25) ** 4)
  1365. # Check that casting input to minimum scalar type doesn't affect result
  1366. # (issue #10262). This could be extended to more test inputs with
  1367. # np.min_scalar_type(np.max(input_matrix)).
  1368. a = np.array([352, 916])
  1369. b = np.array([350, 660])
  1370. assert_equal(minkowski(a, b),
  1371. minkowski(a.astype('uint16'), b.astype('uint16')))
  1372. def test_euclidean(self):
  1373. for x, y in self.cases:
  1374. dist = weuclidean(x, y)
  1375. assert_almost_equal(dist, np.sqrt(5))
  1376. def test_sqeuclidean(self):
  1377. for x, y in self.cases:
  1378. dist = wsqeuclidean(x, y)
  1379. assert_almost_equal(dist, 5.0)
  1380. def test_cosine(self):
  1381. for x, y in self.cases:
  1382. dist = wcosine(x, y)
  1383. assert_almost_equal(dist, 1.0 - 18.0 / (np.sqrt(14) * np.sqrt(27)))
  1384. def test_correlation(self):
  1385. xm = np.array([-1.0, 0, 1.0])
  1386. ym = np.array([-4.0 / 3, -4.0 / 3, 5.0 - 7.0 / 3])
  1387. for x, y in self.cases:
  1388. dist = wcorrelation(x, y)
  1389. assert_almost_equal(dist, 1.0 - np.dot(xm, ym) / (norm(xm) * norm(ym)))
  1390. def test_correlation_positive(self):
  1391. # Regression test for gh-12320 (negative return value due to rounding
  1392. x = np.array([0., 0., 0., 0., 0., 0., -2., 0., 0., 0., -2., -2., -2.,
  1393. 0., -2., 0., -2., 0., 0., -1., -2., 0., 1., 0., 0., -2.,
  1394. 0., 0., -2., 0., -2., -2., -2., -2., -2., -2., 0.])
  1395. y = np.array([1., 1., 1., 1., 1., 1., -1., 1., 1., 1., -1., -1., -1.,
  1396. 1., -1., 1., -1., 1., 1., 0., -1., 1., 2., 1., 1., -1.,
  1397. 1., 1., -1., 1., -1., -1., -1., -1., -1., -1., 1.])
  1398. dist = correlation(x, y)
  1399. assert 0 <= dist <= 10 * np.finfo(np.float64).eps
  1400. def test_mahalanobis(self):
  1401. x = np.array([1.0, 2.0, 3.0])
  1402. y = np.array([1.0, 1.0, 5.0])
  1403. vi = np.array([[2.0, 1.0, 0.0], [1.0, 2.0, 1.0], [0.0, 1.0, 2.0]])
  1404. for x, y in self.cases:
  1405. dist = mahalanobis(x, y, vi)
  1406. assert_almost_equal(dist, np.sqrt(6.0))
  1407. class TestSquareForm:
  1408. checked_dtypes = [np.float64, np.float32, np.int32, np.int8, bool]
  1409. def test_squareform_matrix(self):
  1410. for dtype in self.checked_dtypes:
  1411. self.check_squareform_matrix(dtype)
  1412. def test_squareform_vector(self):
  1413. for dtype in self.checked_dtypes:
  1414. self.check_squareform_vector(dtype)
  1415. def check_squareform_matrix(self, dtype):
  1416. A = np.zeros((0, 0), dtype=dtype)
  1417. rA = squareform(A)
  1418. assert_equal(rA.shape, (0,))
  1419. assert_equal(rA.dtype, dtype)
  1420. A = np.zeros((1, 1), dtype=dtype)
  1421. rA = squareform(A)
  1422. assert_equal(rA.shape, (0,))
  1423. assert_equal(rA.dtype, dtype)
  1424. A = np.array([[0, 4.2], [4.2, 0]], dtype=dtype)
  1425. rA = squareform(A)
  1426. assert_equal(rA.shape, (1,))
  1427. assert_equal(rA.dtype, dtype)
  1428. assert_array_equal(rA, np.array([4.2], dtype=dtype))
  1429. def check_squareform_vector(self, dtype):
  1430. v = np.zeros((0,), dtype=dtype)
  1431. rv = squareform(v)
  1432. assert_equal(rv.shape, (1, 1))
  1433. assert_equal(rv.dtype, dtype)
  1434. assert_array_equal(rv, [[0]])
  1435. v = np.array([8.3], dtype=dtype)
  1436. rv = squareform(v)
  1437. assert_equal(rv.shape, (2, 2))
  1438. assert_equal(rv.dtype, dtype)
  1439. assert_array_equal(rv, np.array([[0, 8.3], [8.3, 0]], dtype=dtype))
  1440. def test_squareform_multi_matrix(self):
  1441. for n in range(2, 5):
  1442. self.check_squareform_multi_matrix(n)
  1443. def check_squareform_multi_matrix(self, n):
  1444. X = np.random.rand(n, 4)
  1445. Y = wpdist_no_const(X)
  1446. assert_equal(len(Y.shape), 1)
  1447. A = squareform(Y)
  1448. Yr = squareform(A)
  1449. s = A.shape
  1450. k = 0
  1451. if verbose >= 3:
  1452. print(A.shape, Y.shape, Yr.shape)
  1453. assert_equal(len(s), 2)
  1454. assert_equal(len(Yr.shape), 1)
  1455. assert_equal(s[0], s[1])
  1456. for i in range(0, s[0]):
  1457. for j in range(i + 1, s[1]):
  1458. if i != j:
  1459. assert_equal(A[i, j], Y[k])
  1460. k += 1
  1461. else:
  1462. assert_equal(A[i, j], 0)
  1463. class TestNumObsY:
  1464. def test_num_obs_y_multi_matrix(self):
  1465. for n in range(2, 10):
  1466. X = np.random.rand(n, 4)
  1467. Y = wpdist_no_const(X)
  1468. assert_equal(num_obs_y(Y), n)
  1469. def test_num_obs_y_1(self):
  1470. # Tests num_obs_y(y) on a condensed distance matrix over 1
  1471. # observations. Expecting exception.
  1472. assert_raises(ValueError, self.check_y, 1)
  1473. def test_num_obs_y_2(self):
  1474. # Tests num_obs_y(y) on a condensed distance matrix over 2
  1475. # observations.
  1476. assert_(self.check_y(2))
  1477. def test_num_obs_y_3(self):
  1478. assert_(self.check_y(3))
  1479. def test_num_obs_y_4(self):
  1480. assert_(self.check_y(4))
  1481. def test_num_obs_y_5_10(self):
  1482. for i in range(5, 16):
  1483. self.minit(i)
  1484. def test_num_obs_y_2_100(self):
  1485. # Tests num_obs_y(y) on 100 improper condensed distance matrices.
  1486. # Expecting exception.
  1487. a = set([])
  1488. for n in range(2, 16):
  1489. a.add(n * (n - 1) / 2)
  1490. for i in range(5, 105):
  1491. if i not in a:
  1492. assert_raises(ValueError, self.bad_y, i)
  1493. def minit(self, n):
  1494. assert_(self.check_y(n))
  1495. def bad_y(self, n):
  1496. y = np.random.rand(n)
  1497. return num_obs_y(y)
  1498. def check_y(self, n):
  1499. return num_obs_y(self.make_y(n)) == n
  1500. def make_y(self, n):
  1501. return np.random.rand((n * (n - 1)) // 2)
  1502. class TestNumObsDM:
  1503. def test_num_obs_dm_multi_matrix(self):
  1504. for n in range(1, 10):
  1505. X = np.random.rand(n, 4)
  1506. Y = wpdist_no_const(X)
  1507. A = squareform(Y)
  1508. if verbose >= 3:
  1509. print(A.shape, Y.shape)
  1510. assert_equal(num_obs_dm(A), n)
  1511. def test_num_obs_dm_0(self):
  1512. # Tests num_obs_dm(D) on a 0x0 distance matrix. Expecting exception.
  1513. assert_(self.check_D(0))
  1514. def test_num_obs_dm_1(self):
  1515. # Tests num_obs_dm(D) on a 1x1 distance matrix.
  1516. assert_(self.check_D(1))
  1517. def test_num_obs_dm_2(self):
  1518. assert_(self.check_D(2))
  1519. def test_num_obs_dm_3(self):
  1520. assert_(self.check_D(2))
  1521. def test_num_obs_dm_4(self):
  1522. assert_(self.check_D(4))
  1523. def check_D(self, n):
  1524. return num_obs_dm(self.make_D(n)) == n
  1525. def make_D(self, n):
  1526. return np.random.rand(n, n)
  1527. def is_valid_dm_throw(D):
  1528. return is_valid_dm(D, throw=True)
  1529. class TestIsValidDM:
  1530. def test_is_valid_dm_improper_shape_1D_E(self):
  1531. D = np.zeros((5,), dtype=np.double)
  1532. assert_raises(ValueError, is_valid_dm_throw, (D))
  1533. def test_is_valid_dm_improper_shape_1D_F(self):
  1534. D = np.zeros((5,), dtype=np.double)
  1535. assert_equal(is_valid_dm(D), False)
  1536. def test_is_valid_dm_improper_shape_3D_E(self):
  1537. D = np.zeros((3, 3, 3), dtype=np.double)
  1538. assert_raises(ValueError, is_valid_dm_throw, (D))
  1539. def test_is_valid_dm_improper_shape_3D_F(self):
  1540. D = np.zeros((3, 3, 3), dtype=np.double)
  1541. assert_equal(is_valid_dm(D), False)
  1542. def test_is_valid_dm_nonzero_diagonal_E(self):
  1543. y = np.random.rand(10)
  1544. D = squareform(y)
  1545. for i in range(0, 5):
  1546. D[i, i] = 2.0
  1547. assert_raises(ValueError, is_valid_dm_throw, (D))
  1548. def test_is_valid_dm_nonzero_diagonal_F(self):
  1549. y = np.random.rand(10)
  1550. D = squareform(y)
  1551. for i in range(0, 5):
  1552. D[i, i] = 2.0
  1553. assert_equal(is_valid_dm(D), False)
  1554. def test_is_valid_dm_asymmetric_E(self):
  1555. y = np.random.rand(10)
  1556. D = squareform(y)
  1557. D[1, 3] = D[3, 1] + 1
  1558. assert_raises(ValueError, is_valid_dm_throw, (D))
  1559. def test_is_valid_dm_asymmetric_F(self):
  1560. y = np.random.rand(10)
  1561. D = squareform(y)
  1562. D[1, 3] = D[3, 1] + 1
  1563. assert_equal(is_valid_dm(D), False)
  1564. def test_is_valid_dm_correct_1_by_1(self):
  1565. D = np.zeros((1, 1), dtype=np.double)
  1566. assert_equal(is_valid_dm(D), True)
  1567. def test_is_valid_dm_correct_2_by_2(self):
  1568. y = np.random.rand(1)
  1569. D = squareform(y)
  1570. assert_equal(is_valid_dm(D), True)
  1571. def test_is_valid_dm_correct_3_by_3(self):
  1572. y = np.random.rand(3)
  1573. D = squareform(y)
  1574. assert_equal(is_valid_dm(D), True)
  1575. def test_is_valid_dm_correct_4_by_4(self):
  1576. y = np.random.rand(6)
  1577. D = squareform(y)
  1578. assert_equal(is_valid_dm(D), True)
  1579. def test_is_valid_dm_correct_5_by_5(self):
  1580. y = np.random.rand(10)
  1581. D = squareform(y)
  1582. assert_equal(is_valid_dm(D), True)
  1583. def is_valid_y_throw(y):
  1584. return is_valid_y(y, throw=True)
  1585. class TestIsValidY:
  1586. # If test case name ends on "_E" then an exception is expected for the
  1587. # given input, if it ends in "_F" then False is expected for the is_valid_y
  1588. # check. Otherwise the input is expected to be valid.
  1589. def test_is_valid_y_improper_shape_2D_E(self):
  1590. y = np.zeros((3, 3,), dtype=np.double)
  1591. assert_raises(ValueError, is_valid_y_throw, (y))
  1592. def test_is_valid_y_improper_shape_2D_F(self):
  1593. y = np.zeros((3, 3,), dtype=np.double)
  1594. assert_equal(is_valid_y(y), False)
  1595. def test_is_valid_y_improper_shape_3D_E(self):
  1596. y = np.zeros((3, 3, 3), dtype=np.double)
  1597. assert_raises(ValueError, is_valid_y_throw, (y))
  1598. def test_is_valid_y_improper_shape_3D_F(self):
  1599. y = np.zeros((3, 3, 3), dtype=np.double)
  1600. assert_equal(is_valid_y(y), False)
  1601. def test_is_valid_y_correct_2_by_2(self):
  1602. y = self.correct_n_by_n(2)
  1603. assert_equal(is_valid_y(y), True)
  1604. def test_is_valid_y_correct_3_by_3(self):
  1605. y = self.correct_n_by_n(3)
  1606. assert_equal(is_valid_y(y), True)
  1607. def test_is_valid_y_correct_4_by_4(self):
  1608. y = self.correct_n_by_n(4)
  1609. assert_equal(is_valid_y(y), True)
  1610. def test_is_valid_y_correct_5_by_5(self):
  1611. y = self.correct_n_by_n(5)
  1612. assert_equal(is_valid_y(y), True)
  1613. def test_is_valid_y_2_100(self):
  1614. a = set([])
  1615. for n in range(2, 16):
  1616. a.add(n * (n - 1) / 2)
  1617. for i in range(5, 105):
  1618. if i not in a:
  1619. assert_raises(ValueError, self.bad_y, i)
  1620. def bad_y(self, n):
  1621. y = np.random.rand(n)
  1622. return is_valid_y(y, throw=True)
  1623. def correct_n_by_n(self, n):
  1624. y = np.random.rand((n * (n - 1)) // 2)
  1625. return y
  1626. @pytest.mark.parametrize("p", [-10.0, -0.5, 0.0])
  1627. def test_bad_p(p):
  1628. # Raise ValueError if p <=0.
  1629. assert_raises(ValueError, minkowski, [1, 2], [3, 4], p)
  1630. assert_raises(ValueError, minkowski, [1, 2], [3, 4], p, [1, 1])
  1631. def test_sokalsneath_all_false():
  1632. # Regression test for ticket #876
  1633. assert_raises(ValueError, sokalsneath, [False, False, False], [False, False, False])
  1634. def test_canberra():
  1635. # Regression test for ticket #1430.
  1636. assert_equal(wcanberra([1, 2, 3], [2, 4, 6]), 1)
  1637. assert_equal(wcanberra([1, 1, 0, 0], [1, 0, 1, 0]), 2)
  1638. def test_braycurtis():
  1639. # Regression test for ticket #1430.
  1640. assert_almost_equal(wbraycurtis([1, 2, 3], [2, 4, 6]), 1. / 3, decimal=15)
  1641. assert_almost_equal(wbraycurtis([1, 1, 0, 0], [1, 0, 1, 0]), 0.5, decimal=15)
  1642. def test_euclideans():
  1643. # Regression test for ticket #1328.
  1644. x1 = np.array([1, 1, 1])
  1645. x2 = np.array([0, 0, 0])
  1646. # Basic test of the calculation.
  1647. assert_almost_equal(wsqeuclidean(x1, x2), 3.0, decimal=14)
  1648. assert_almost_equal(weuclidean(x1, x2), np.sqrt(3), decimal=14)
  1649. # Check flattening for (1, N) or (N, 1) inputs
  1650. with assert_raises(ValueError,
  1651. match="Input vector should be 1-D"):
  1652. weuclidean(x1[np.newaxis, :], x2[np.newaxis, :]), np.sqrt(3)
  1653. with assert_raises(ValueError,
  1654. match="Input vector should be 1-D"):
  1655. wsqeuclidean(x1[np.newaxis, :], x2[np.newaxis, :])
  1656. with assert_raises(ValueError,
  1657. match="Input vector should be 1-D"):
  1658. wsqeuclidean(x1[:, np.newaxis], x2[:, np.newaxis])
  1659. # Distance metrics only defined for vectors (= 1-D)
  1660. x = np.arange(4).reshape(2, 2)
  1661. assert_raises(ValueError, weuclidean, x, x)
  1662. assert_raises(ValueError, wsqeuclidean, x, x)
  1663. # Another check, with random data.
  1664. rs = np.random.RandomState(1234567890)
  1665. x = rs.rand(10)
  1666. y = rs.rand(10)
  1667. d1 = weuclidean(x, y)
  1668. d2 = wsqeuclidean(x, y)
  1669. assert_almost_equal(d1**2, d2, decimal=14)
  1670. def test_hamming_unequal_length():
  1671. # Regression test for gh-4290.
  1672. x = [0, 0, 1]
  1673. y = [1, 0, 1, 0]
  1674. # Used to give an AttributeError from ndarray.mean called on bool
  1675. assert_raises(ValueError, whamming, x, y)
  1676. def test_hamming_string_array():
  1677. # https://github.com/scikit-learn/scikit-learn/issues/4014
  1678. a = np.array(['eggs', 'spam', 'spam', 'eggs', 'spam', 'spam', 'spam',
  1679. 'spam', 'spam', 'spam', 'spam', 'eggs', 'eggs', 'spam',
  1680. 'eggs', 'eggs', 'eggs', 'eggs', 'eggs', 'spam'],
  1681. dtype='|S4')
  1682. b = np.array(['eggs', 'spam', 'spam', 'eggs', 'eggs', 'spam', 'spam',
  1683. 'spam', 'spam', 'eggs', 'spam', 'eggs', 'spam', 'eggs',
  1684. 'spam', 'spam', 'eggs', 'spam', 'spam', 'eggs'],
  1685. dtype='|S4')
  1686. desired = 0.45
  1687. assert_allclose(whamming(a, b), desired)
  1688. def test_minkowski_w():
  1689. # Regression test for gh-8142.
  1690. arr_in = np.array([[83.33333333, 100., 83.33333333, 100., 36.,
  1691. 60., 90., 150., 24., 48.],
  1692. [83.33333333, 100., 83.33333333, 100., 36.,
  1693. 60., 90., 150., 24., 48.]])
  1694. p0 = pdist(arr_in, metric='minkowski', p=1, w=None)
  1695. c0 = cdist(arr_in, arr_in, metric='minkowski', p=1, w=None)
  1696. p1 = pdist(arr_in, metric='minkowski', p=1)
  1697. c1 = cdist(arr_in, arr_in, metric='minkowski', p=1)
  1698. assert_allclose(p0, p1, rtol=1e-15)
  1699. assert_allclose(c0, c1, rtol=1e-15)
  1700. def test_sqeuclidean_dtypes():
  1701. # Assert that sqeuclidean returns the right types of values.
  1702. # Integer types should be converted to floating for stability.
  1703. # Floating point types should be the same as the input.
  1704. x = [1, 2, 3]
  1705. y = [4, 5, 6]
  1706. for dtype in [np.int8, np.int16, np.int32, np.int64]:
  1707. d = wsqeuclidean(np.asarray(x, dtype=dtype), np.asarray(y, dtype=dtype))
  1708. assert_(np.issubdtype(d.dtype, np.floating))
  1709. for dtype in [np.uint8, np.uint16, np.uint32, np.uint64]:
  1710. umax = np.iinfo(dtype).max
  1711. d1 = wsqeuclidean([0], np.asarray([umax], dtype=dtype))
  1712. d2 = wsqeuclidean(np.asarray([umax], dtype=dtype), [0])
  1713. assert_equal(d1, d2)
  1714. assert_equal(d1, np.float64(umax)**2)
  1715. dtypes = [np.float32, np.float64, np.complex64, np.complex128]
  1716. for dtype in ['float16', 'float128']:
  1717. # These aren't present in older numpy versions; float128 may also not
  1718. # be present on all platforms.
  1719. if hasattr(np, dtype):
  1720. dtypes.append(getattr(np, dtype))
  1721. for dtype in dtypes:
  1722. d = wsqeuclidean(np.asarray(x, dtype=dtype), np.asarray(y, dtype=dtype))
  1723. assert_equal(d.dtype, dtype)
  1724. def test_sokalmichener():
  1725. # Test that sokalmichener has the same result for bool and int inputs.
  1726. p = [True, True, False]
  1727. q = [True, False, True]
  1728. x = [int(b) for b in p]
  1729. y = [int(b) for b in q]
  1730. dist1 = sokalmichener(p, q)
  1731. dist2 = sokalmichener(x, y)
  1732. # These should be exactly the same.
  1733. assert_equal(dist1, dist2)
  1734. def test_sokalmichener_with_weight():
  1735. # from: | 1 | | 0 |
  1736. # to: | 1 | | 1 |
  1737. # weight| | 1 | | 0.2
  1738. ntf = 0 * 1 + 0 * 0.2
  1739. nft = 0 * 1 + 1 * 0.2
  1740. ntt = 1 * 1 + 0 * 0.2
  1741. nff = 0 * 1 + 0 * 0.2
  1742. expected = 2 * (nft + ntf) / (ntt + nff + 2 * (nft + ntf))
  1743. assert_almost_equal(expected, 0.2857143)
  1744. actual = sokalmichener([1, 0], [1, 1], w=[1, 0.2])
  1745. assert_almost_equal(expected, actual)
  1746. a1 = [False, False, True, True, True, False, False, True, True, True, True,
  1747. True, True, False, True, False, False, False, True, True]
  1748. a2 = [True, True, True, False, False, True, True, True, False, True,
  1749. True, True, True, True, False, False, False, True, True, True]
  1750. for w in [0.05, 0.1, 1.0, 20.0]:
  1751. assert_almost_equal(sokalmichener(a2, a1, [w]), 0.6666666666666666)
  1752. def test_modifies_input():
  1753. # test whether cdist or pdist modifies input arrays
  1754. X1 = np.asarray([[1., 2., 3.],
  1755. [1.2, 2.3, 3.4],
  1756. [2.2, 2.3, 4.4],
  1757. [22.2, 23.3, 44.4]])
  1758. X1_copy = X1.copy()
  1759. for metric in _METRICS_NAMES:
  1760. cdist(X1, X1, metric)
  1761. pdist(X1, metric)
  1762. assert_array_equal(X1, X1_copy)
  1763. def test_Xdist_deprecated_args():
  1764. # testing both cdist and pdist deprecated warnings
  1765. X1 = np.asarray([[1., 2., 3.],
  1766. [1.2, 2.3, 3.4],
  1767. [2.2, 2.3, 4.4],
  1768. [22.2, 23.3, 44.4]])
  1769. weights = np.arange(3)
  1770. for metric in _METRICS_NAMES:
  1771. with pytest.raises(TypeError):
  1772. cdist(X1, X1, metric, 2.)
  1773. with pytest.raises(TypeError):
  1774. pdist(X1, metric, 2.)
  1775. for arg in ["p", "V", "VI"]:
  1776. kwargs = {arg:"foo"}
  1777. if ((arg == "V" and metric == "seuclidean") or
  1778. (arg == "VI" and metric == "mahalanobis") or
  1779. (arg == "p" and metric == "minkowski")):
  1780. continue
  1781. with pytest.raises(TypeError):
  1782. cdist(X1, X1, metric, **kwargs)
  1783. with pytest.raises(TypeError):
  1784. pdist(X1, metric, **kwargs)
  1785. def test_Xdist_non_negative_weights():
  1786. X = eo['random-float32-data'][::5, ::2]
  1787. w = np.ones(X.shape[1])
  1788. w[::5] = -w[::5]
  1789. for metric in _METRICS_NAMES:
  1790. if metric in ['seuclidean', 'mahalanobis', 'jensenshannon']:
  1791. continue
  1792. with np.testing.suppress_warnings() as sup:
  1793. if metric == "kulsinski":
  1794. sup.filter(DeprecationWarning,
  1795. "Kulsinski has been deprecated from")
  1796. for m in [metric, eval(metric), "test_" + metric]:
  1797. assert_raises(ValueError, pdist, X, m, w=w)
  1798. assert_raises(ValueError, cdist, X, X, m, w=w)
  1799. def test__validate_vector():
  1800. x = [1, 2, 3]
  1801. y = _validate_vector(x)
  1802. assert_array_equal(y, x)
  1803. y = _validate_vector(x, dtype=np.float64)
  1804. assert_array_equal(y, x)
  1805. assert_equal(y.dtype, np.float64)
  1806. x = [1]
  1807. y = _validate_vector(x)
  1808. assert_equal(y.ndim, 1)
  1809. assert_equal(y, x)
  1810. x = 1
  1811. with assert_raises(ValueError,
  1812. match="Input vector should be 1-D"):
  1813. _validate_vector(x)
  1814. x = np.arange(5).reshape(1, -1, 1)
  1815. with assert_raises(ValueError,
  1816. match="Input vector should be 1-D"):
  1817. _validate_vector(x)
  1818. x = [[1, 2], [3, 4]]
  1819. with assert_raises(ValueError,
  1820. match="Input vector should be 1-D"):
  1821. _validate_vector(x)
  1822. def test_yule_all_same():
  1823. # Test yule avoids a divide by zero when exactly equal
  1824. x = np.ones((2, 6), dtype=bool)
  1825. d = wyule(x[0], x[0])
  1826. assert d == 0.0
  1827. d = pdist(x, 'yule')
  1828. assert_equal(d, [0.0])
  1829. d = cdist(x[:1], x[:1], 'yule')
  1830. assert_equal(d, [[0.0]])
  1831. def test_jensenshannon():
  1832. assert_almost_equal(jensenshannon([1.0, 0.0, 0.0], [0.0, 1.0, 0.0], 2.0),
  1833. 1.0)
  1834. assert_almost_equal(jensenshannon([1.0, 0.0], [0.5, 0.5]),
  1835. 0.46450140402245893)
  1836. assert_almost_equal(jensenshannon([1.0, 0.0, 0.0], [1.0, 0.0, 0.0]), 0.0)
  1837. assert_almost_equal(jensenshannon([[1.0, 2.0]], [[0.5, 1.5]], axis=0),
  1838. [0.0, 0.0])
  1839. assert_almost_equal(jensenshannon([[1.0, 2.0]], [[0.5, 1.5]], axis=1),
  1840. [0.0649045])
  1841. assert_almost_equal(jensenshannon([[1.0, 2.0]], [[0.5, 1.5]], axis=0,
  1842. keepdims=True), [[0.0, 0.0]])
  1843. assert_almost_equal(jensenshannon([[1.0, 2.0]], [[0.5, 1.5]], axis=1,
  1844. keepdims=True), [[0.0649045]])
  1845. a = np.array([[1, 2, 3, 4],
  1846. [5, 6, 7, 8],
  1847. [9, 10, 11, 12]])
  1848. b = np.array([[13, 14, 15, 16],
  1849. [17, 18, 19, 20],
  1850. [21, 22, 23, 24]])
  1851. assert_almost_equal(jensenshannon(a, b, axis=0),
  1852. [0.1954288, 0.1447697, 0.1138377, 0.0927636])
  1853. assert_almost_equal(jensenshannon(a, b, axis=1),
  1854. [0.1402339, 0.0399106, 0.0201815])
  1855. def test_kulsinski_deprecation():
  1856. msg = ("Kulsinski has been deprecated from scipy.spatial.distance"
  1857. " in SciPy 1.9.0 and it will be removed in SciPy 1.11.0."
  1858. " It is superseded by scipy.spatial.distance.kulczynski1.")
  1859. with pytest.warns(DeprecationWarning, match=msg):
  1860. kulsinski([], [])