test_simd.py 43 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214
  1. # NOTE: Please avoid the use of numpy.testing since NPYV intrinsics
  2. # may be involved in their functionality.
  3. import pytest, math, re
  4. import itertools
  5. import operator
  6. from numpy.core._simd import targets, clear_floatstatus, get_floatstatus
  7. from numpy.core._multiarray_umath import __cpu_baseline__
  8. def check_floatstatus(divbyzero=False, overflow=False,
  9. underflow=False, invalid=False,
  10. all=False):
  11. #define NPY_FPE_DIVIDEBYZERO 1
  12. #define NPY_FPE_OVERFLOW 2
  13. #define NPY_FPE_UNDERFLOW 4
  14. #define NPY_FPE_INVALID 8
  15. err = get_floatstatus()
  16. ret = (all or divbyzero) and (err & 1) != 0
  17. ret |= (all or overflow) and (err & 2) != 0
  18. ret |= (all or underflow) and (err & 4) != 0
  19. ret |= (all or invalid) and (err & 8) != 0
  20. return ret
  21. class _Test_Utility:
  22. # submodule of the desired SIMD extension, e.g. targets["AVX512F"]
  23. npyv = None
  24. # the current data type suffix e.g. 's8'
  25. sfx = None
  26. # target name can be 'baseline' or one or more of CPU features
  27. target_name = None
  28. def __getattr__(self, attr):
  29. """
  30. To call NPV intrinsics without the attribute 'npyv' and
  31. auto suffixing intrinsics according to class attribute 'sfx'
  32. """
  33. return getattr(self.npyv, attr + "_" + self.sfx)
  34. def _data(self, start=None, count=None, reverse=False):
  35. """
  36. Create list of consecutive numbers according to number of vector's lanes.
  37. """
  38. if start is None:
  39. start = 1
  40. if count is None:
  41. count = self.nlanes
  42. rng = range(start, start + count)
  43. if reverse:
  44. rng = reversed(rng)
  45. if self._is_fp():
  46. return [x / 1.0 for x in rng]
  47. return list(rng)
  48. def _is_unsigned(self):
  49. return self.sfx[0] == 'u'
  50. def _is_signed(self):
  51. return self.sfx[0] == 's'
  52. def _is_fp(self):
  53. return self.sfx[0] == 'f'
  54. def _scalar_size(self):
  55. return int(self.sfx[1:])
  56. def _int_clip(self, seq):
  57. if self._is_fp():
  58. return seq
  59. max_int = self._int_max()
  60. min_int = self._int_min()
  61. return [min(max(v, min_int), max_int) for v in seq]
  62. def _int_max(self):
  63. if self._is_fp():
  64. return None
  65. max_u = self._to_unsigned(self.setall(-1))[0]
  66. if self._is_signed():
  67. return max_u // 2
  68. return max_u
  69. def _int_min(self):
  70. if self._is_fp():
  71. return None
  72. if self._is_unsigned():
  73. return 0
  74. return -(self._int_max() + 1)
  75. def _true_mask(self):
  76. max_unsig = getattr(self.npyv, "setall_u" + self.sfx[1:])(-1)
  77. return max_unsig[0]
  78. def _to_unsigned(self, vector):
  79. if isinstance(vector, (list, tuple)):
  80. return getattr(self.npyv, "load_u" + self.sfx[1:])(vector)
  81. else:
  82. sfx = vector.__name__.replace("npyv_", "")
  83. if sfx[0] == "b":
  84. cvt_intrin = "cvt_u{0}_b{0}"
  85. else:
  86. cvt_intrin = "reinterpret_u{0}_{1}"
  87. return getattr(self.npyv, cvt_intrin.format(sfx[1:], sfx))(vector)
  88. def _pinfinity(self):
  89. return float("inf")
  90. def _ninfinity(self):
  91. return -float("inf")
  92. def _nan(self):
  93. return float("nan")
  94. def _cpu_features(self):
  95. target = self.target_name
  96. if target == "baseline":
  97. target = __cpu_baseline__
  98. else:
  99. target = target.split('__') # multi-target separator
  100. return ' '.join(target)
  101. class _SIMD_BOOL(_Test_Utility):
  102. """
  103. To test all boolean vector types at once
  104. """
  105. def _nlanes(self):
  106. return getattr(self.npyv, "nlanes_u" + self.sfx[1:])
  107. def _data(self, start=None, count=None, reverse=False):
  108. true_mask = self._true_mask()
  109. rng = range(self._nlanes())
  110. if reverse:
  111. rng = reversed(rng)
  112. return [true_mask if x % 2 else 0 for x in rng]
  113. def _load_b(self, data):
  114. len_str = self.sfx[1:]
  115. load = getattr(self.npyv, "load_u" + len_str)
  116. cvt = getattr(self.npyv, f"cvt_b{len_str}_u{len_str}")
  117. return cvt(load(data))
  118. def test_operators_logical(self):
  119. """
  120. Logical operations for boolean types.
  121. Test intrinsics:
  122. npyv_xor_##SFX, npyv_and_##SFX, npyv_or_##SFX, npyv_not_##SFX,
  123. npyv_andc_b8, npvy_orc_b8, nvpy_xnor_b8
  124. """
  125. data_a = self._data()
  126. data_b = self._data(reverse=True)
  127. vdata_a = self._load_b(data_a)
  128. vdata_b = self._load_b(data_b)
  129. data_and = [a & b for a, b in zip(data_a, data_b)]
  130. vand = getattr(self, "and")(vdata_a, vdata_b)
  131. assert vand == data_and
  132. data_or = [a | b for a, b in zip(data_a, data_b)]
  133. vor = getattr(self, "or")(vdata_a, vdata_b)
  134. assert vor == data_or
  135. data_xor = [a ^ b for a, b in zip(data_a, data_b)]
  136. vxor = getattr(self, "xor")(vdata_a, vdata_b)
  137. assert vxor == data_xor
  138. vnot = getattr(self, "not")(vdata_a)
  139. assert vnot == data_b
  140. # among the boolean types, andc, orc and xnor only support b8
  141. if self.sfx not in ("b8"):
  142. return
  143. data_andc = [(a & ~b) & 0xFF for a, b in zip(data_a, data_b)]
  144. vandc = getattr(self, "andc")(vdata_a, vdata_b)
  145. assert data_andc == vandc
  146. data_orc = [(a | ~b) & 0xFF for a, b in zip(data_a, data_b)]
  147. vorc = getattr(self, "orc")(vdata_a, vdata_b)
  148. assert data_orc == vorc
  149. data_xnor = [~(a ^ b) & 0xFF for a, b in zip(data_a, data_b)]
  150. vxnor = getattr(self, "xnor")(vdata_a, vdata_b)
  151. assert data_xnor == vxnor
  152. def test_tobits(self):
  153. data2bits = lambda data: sum([int(x != 0) << i for i, x in enumerate(data, 0)])
  154. for data in (self._data(), self._data(reverse=True)):
  155. vdata = self._load_b(data)
  156. data_bits = data2bits(data)
  157. tobits = self.tobits(vdata)
  158. bin_tobits = bin(tobits)
  159. assert bin_tobits == bin(data_bits)
  160. def test_pack(self):
  161. """
  162. Pack multiple vectors into one
  163. Test intrinsics:
  164. npyv_pack_b8_b16
  165. npyv_pack_b8_b32
  166. npyv_pack_b8_b64
  167. """
  168. if self.sfx not in ("b16", "b32", "b64"):
  169. return
  170. # create the vectors
  171. data = self._data()
  172. rdata = self._data(reverse=True)
  173. vdata = self._load_b(data)
  174. vrdata = self._load_b(rdata)
  175. pack_simd = getattr(self.npyv, f"pack_b8_{self.sfx}")
  176. # for scalar execution, concatenate the elements of the multiple lists
  177. # into a single list (spack) and then iterate over the elements of
  178. # the created list applying a mask to capture the first byte of them.
  179. if self.sfx == "b16":
  180. spack = [(i & 0xFF) for i in (list(rdata) + list(data))]
  181. vpack = pack_simd(vrdata, vdata)
  182. elif self.sfx == "b32":
  183. spack = [(i & 0xFF) for i in (2*list(rdata) + 2*list(data))]
  184. vpack = pack_simd(vrdata, vrdata, vdata, vdata)
  185. elif self.sfx == "b64":
  186. spack = [(i & 0xFF) for i in (4*list(rdata) + 4*list(data))]
  187. vpack = pack_simd(vrdata, vrdata, vrdata, vrdata,
  188. vdata, vdata, vdata, vdata)
  189. assert vpack == spack
  190. @pytest.mark.parametrize("intrin", ["any", "all"])
  191. @pytest.mark.parametrize("data", (
  192. [-1, 0],
  193. [0, -1],
  194. [-1],
  195. [0]
  196. ))
  197. def test_operators_crosstest(self, intrin, data):
  198. """
  199. Test intrinsics:
  200. npyv_any_##SFX
  201. npyv_all_##SFX
  202. """
  203. data_a = self._load_b(data * self._nlanes())
  204. func = eval(intrin)
  205. intrin = getattr(self, intrin)
  206. desired = func(data_a)
  207. simd = intrin(data_a)
  208. assert not not simd == desired
  209. class _SIMD_INT(_Test_Utility):
  210. """
  211. To test all integer vector types at once
  212. """
  213. def test_operators_shift(self):
  214. if self.sfx in ("u8", "s8"):
  215. return
  216. data_a = self._data(self._int_max() - self.nlanes)
  217. data_b = self._data(self._int_min(), reverse=True)
  218. vdata_a, vdata_b = self.load(data_a), self.load(data_b)
  219. for count in range(self._scalar_size()):
  220. # load to cast
  221. data_shl_a = self.load([a << count for a in data_a])
  222. # left shift
  223. shl = self.shl(vdata_a, count)
  224. assert shl == data_shl_a
  225. # load to cast
  226. data_shr_a = self.load([a >> count for a in data_a])
  227. # right shift
  228. shr = self.shr(vdata_a, count)
  229. assert shr == data_shr_a
  230. # shift by zero or max or out-range immediate constant is not applicable and illogical
  231. for count in range(1, self._scalar_size()):
  232. # load to cast
  233. data_shl_a = self.load([a << count for a in data_a])
  234. # left shift by an immediate constant
  235. shli = self.shli(vdata_a, count)
  236. assert shli == data_shl_a
  237. # load to cast
  238. data_shr_a = self.load([a >> count for a in data_a])
  239. # right shift by an immediate constant
  240. shri = self.shri(vdata_a, count)
  241. assert shri == data_shr_a
  242. def test_arithmetic_subadd_saturated(self):
  243. if self.sfx in ("u32", "s32", "u64", "s64"):
  244. return
  245. data_a = self._data(self._int_max() - self.nlanes)
  246. data_b = self._data(self._int_min(), reverse=True)
  247. vdata_a, vdata_b = self.load(data_a), self.load(data_b)
  248. data_adds = self._int_clip([a + b for a, b in zip(data_a, data_b)])
  249. adds = self.adds(vdata_a, vdata_b)
  250. assert adds == data_adds
  251. data_subs = self._int_clip([a - b for a, b in zip(data_a, data_b)])
  252. subs = self.subs(vdata_a, vdata_b)
  253. assert subs == data_subs
  254. def test_math_max_min(self):
  255. data_a = self._data()
  256. data_b = self._data(self.nlanes)
  257. vdata_a, vdata_b = self.load(data_a), self.load(data_b)
  258. data_max = [max(a, b) for a, b in zip(data_a, data_b)]
  259. simd_max = self.max(vdata_a, vdata_b)
  260. assert simd_max == data_max
  261. data_min = [min(a, b) for a, b in zip(data_a, data_b)]
  262. simd_min = self.min(vdata_a, vdata_b)
  263. assert simd_min == data_min
  264. @pytest.mark.parametrize("start", [-100, -10000, 0, 100, 10000])
  265. def test_reduce_max_min(self, start):
  266. """
  267. Test intrinsics:
  268. npyv_reduce_max_##sfx
  269. npyv_reduce_min_##sfx
  270. """
  271. vdata_a = self.load(self._data(start))
  272. assert self.reduce_max(vdata_a) == max(vdata_a)
  273. assert self.reduce_min(vdata_a) == min(vdata_a)
  274. class _SIMD_FP32(_Test_Utility):
  275. """
  276. To only test single precision
  277. """
  278. def test_conversions(self):
  279. """
  280. Round to nearest even integer, assume CPU control register is set to rounding.
  281. Test intrinsics:
  282. npyv_round_s32_##SFX
  283. """
  284. features = self._cpu_features()
  285. if not self.npyv.simd_f64 and re.match(r".*(NEON|ASIMD)", features):
  286. # very costly to emulate nearest even on Armv7
  287. # instead we round halves to up. e.g. 0.5 -> 1, -0.5 -> -1
  288. _round = lambda v: int(v + (0.5 if v >= 0 else -0.5))
  289. else:
  290. _round = round
  291. vdata_a = self.load(self._data())
  292. vdata_a = self.sub(vdata_a, self.setall(0.5))
  293. data_round = [_round(x) for x in vdata_a]
  294. vround = self.round_s32(vdata_a)
  295. assert vround == data_round
  296. class _SIMD_FP64(_Test_Utility):
  297. """
  298. To only test double precision
  299. """
  300. def test_conversions(self):
  301. """
  302. Round to nearest even integer, assume CPU control register is set to rounding.
  303. Test intrinsics:
  304. npyv_round_s32_##SFX
  305. """
  306. vdata_a = self.load(self._data())
  307. vdata_a = self.sub(vdata_a, self.setall(0.5))
  308. vdata_b = self.mul(vdata_a, self.setall(-1.5))
  309. data_round = [round(x) for x in list(vdata_a) + list(vdata_b)]
  310. vround = self.round_s32(vdata_a, vdata_b)
  311. assert vround == data_round
  312. class _SIMD_FP(_Test_Utility):
  313. """
  314. To test all float vector types at once
  315. """
  316. def test_arithmetic_fused(self):
  317. vdata_a, vdata_b, vdata_c = [self.load(self._data())]*3
  318. vdata_cx2 = self.add(vdata_c, vdata_c)
  319. # multiply and add, a*b + c
  320. data_fma = self.load([a * b + c for a, b, c in zip(vdata_a, vdata_b, vdata_c)])
  321. fma = self.muladd(vdata_a, vdata_b, vdata_c)
  322. assert fma == data_fma
  323. # multiply and subtract, a*b - c
  324. fms = self.mulsub(vdata_a, vdata_b, vdata_c)
  325. data_fms = self.sub(data_fma, vdata_cx2)
  326. assert fms == data_fms
  327. # negate multiply and add, -(a*b) + c
  328. nfma = self.nmuladd(vdata_a, vdata_b, vdata_c)
  329. data_nfma = self.sub(vdata_cx2, data_fma)
  330. assert nfma == data_nfma
  331. # negate multiply and subtract, -(a*b) - c
  332. nfms = self.nmulsub(vdata_a, vdata_b, vdata_c)
  333. data_nfms = self.mul(data_fma, self.setall(-1))
  334. assert nfms == data_nfms
  335. def test_abs(self):
  336. pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan()
  337. data = self._data()
  338. vdata = self.load(self._data())
  339. abs_cases = ((-0, 0), (ninf, pinf), (pinf, pinf), (nan, nan))
  340. for case, desired in abs_cases:
  341. data_abs = [desired]*self.nlanes
  342. vabs = self.abs(self.setall(case))
  343. assert vabs == pytest.approx(data_abs, nan_ok=True)
  344. vabs = self.abs(self.mul(vdata, self.setall(-1)))
  345. assert vabs == data
  346. def test_sqrt(self):
  347. pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan()
  348. data = self._data()
  349. vdata = self.load(self._data())
  350. sqrt_cases = ((-0.0, -0.0), (0.0, 0.0), (-1.0, nan), (ninf, nan), (pinf, pinf))
  351. for case, desired in sqrt_cases:
  352. data_sqrt = [desired]*self.nlanes
  353. sqrt = self.sqrt(self.setall(case))
  354. assert sqrt == pytest.approx(data_sqrt, nan_ok=True)
  355. data_sqrt = self.load([math.sqrt(x) for x in data]) # load to truncate precision
  356. sqrt = self.sqrt(vdata)
  357. assert sqrt == data_sqrt
  358. def test_square(self):
  359. pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan()
  360. data = self._data()
  361. vdata = self.load(self._data())
  362. # square
  363. square_cases = ((nan, nan), (pinf, pinf), (ninf, pinf))
  364. for case, desired in square_cases:
  365. data_square = [desired]*self.nlanes
  366. square = self.square(self.setall(case))
  367. assert square == pytest.approx(data_square, nan_ok=True)
  368. data_square = [x*x for x in data]
  369. square = self.square(vdata)
  370. assert square == data_square
  371. @pytest.mark.parametrize("intrin, func", [("ceil", math.ceil),
  372. ("trunc", math.trunc), ("floor", math.floor), ("rint", round)])
  373. def test_rounding(self, intrin, func):
  374. """
  375. Test intrinsics:
  376. npyv_rint_##SFX
  377. npyv_ceil_##SFX
  378. npyv_trunc_##SFX
  379. npyv_floor##SFX
  380. """
  381. intrin_name = intrin
  382. intrin = getattr(self, intrin)
  383. pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan()
  384. # special cases
  385. round_cases = ((nan, nan), (pinf, pinf), (ninf, ninf))
  386. for case, desired in round_cases:
  387. data_round = [desired]*self.nlanes
  388. _round = intrin(self.setall(case))
  389. assert _round == pytest.approx(data_round, nan_ok=True)
  390. for x in range(0, 2**20, 256**2):
  391. for w in (-1.05, -1.10, -1.15, 1.05, 1.10, 1.15):
  392. data = self.load([(x+a)*w for a in range(self.nlanes)])
  393. data_round = [func(x) for x in data]
  394. _round = intrin(data)
  395. assert _round == data_round
  396. # test large numbers
  397. for i in (
  398. 1.1529215045988576e+18, 4.6116860183954304e+18,
  399. 5.902958103546122e+20, 2.3611832414184488e+21
  400. ):
  401. x = self.setall(i)
  402. y = intrin(x)
  403. data_round = [func(n) for n in x]
  404. assert y == data_round
  405. # signed zero
  406. if intrin_name == "floor":
  407. data_szero = (-0.0,)
  408. else:
  409. data_szero = (-0.0, -0.25, -0.30, -0.45, -0.5)
  410. for w in data_szero:
  411. _round = self._to_unsigned(intrin(self.setall(w)))
  412. data_round = self._to_unsigned(self.setall(-0.0))
  413. assert _round == data_round
  414. @pytest.mark.parametrize("intrin", [
  415. "max", "maxp", "maxn", "min", "minp", "minn"
  416. ])
  417. def test_max_min(self, intrin):
  418. """
  419. Test intrinsics:
  420. npyv_max_##sfx
  421. npyv_maxp_##sfx
  422. npyv_maxn_##sfx
  423. npyv_min_##sfx
  424. npyv_minp_##sfx
  425. npyv_minn_##sfx
  426. npyv_reduce_max_##sfx
  427. npyv_reduce_maxp_##sfx
  428. npyv_reduce_maxn_##sfx
  429. npyv_reduce_min_##sfx
  430. npyv_reduce_minp_##sfx
  431. npyv_reduce_minn_##sfx
  432. """
  433. pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan()
  434. chk_nan = {"xp": 1, "np": 1, "nn": 2, "xn": 2}.get(intrin[-2:], 0)
  435. func = eval(intrin[:3])
  436. reduce_intrin = getattr(self, "reduce_" + intrin)
  437. intrin = getattr(self, intrin)
  438. hf_nlanes = self.nlanes//2
  439. cases = (
  440. ([0.0, -0.0], [-0.0, 0.0]),
  441. ([10, -10], [10, -10]),
  442. ([pinf, 10], [10, ninf]),
  443. ([10, pinf], [ninf, 10]),
  444. ([10, -10], [10, -10]),
  445. ([-10, 10], [-10, 10])
  446. )
  447. for op1, op2 in cases:
  448. vdata_a = self.load(op1*hf_nlanes)
  449. vdata_b = self.load(op2*hf_nlanes)
  450. data = func(vdata_a, vdata_b)
  451. simd = intrin(vdata_a, vdata_b)
  452. assert simd == data
  453. data = func(vdata_a)
  454. simd = reduce_intrin(vdata_a)
  455. assert simd == data
  456. if not chk_nan:
  457. return
  458. if chk_nan == 1:
  459. test_nan = lambda a, b: (
  460. b if math.isnan(a) else a if math.isnan(b) else b
  461. )
  462. else:
  463. test_nan = lambda a, b: (
  464. nan if math.isnan(a) or math.isnan(b) else b
  465. )
  466. cases = (
  467. (nan, 10),
  468. (10, nan),
  469. (nan, pinf),
  470. (pinf, nan),
  471. (nan, nan)
  472. )
  473. for op1, op2 in cases:
  474. vdata_ab = self.load([op1, op2]*hf_nlanes)
  475. data = test_nan(op1, op2)
  476. simd = reduce_intrin(vdata_ab)
  477. assert simd == pytest.approx(data, nan_ok=True)
  478. vdata_a = self.setall(op1)
  479. vdata_b = self.setall(op2)
  480. data = [data] * self.nlanes
  481. simd = intrin(vdata_a, vdata_b)
  482. assert simd == pytest.approx(data, nan_ok=True)
  483. def test_reciprocal(self):
  484. pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan()
  485. data = self._data()
  486. vdata = self.load(self._data())
  487. recip_cases = ((nan, nan), (pinf, 0.0), (ninf, -0.0), (0.0, pinf), (-0.0, ninf))
  488. for case, desired in recip_cases:
  489. data_recip = [desired]*self.nlanes
  490. recip = self.recip(self.setall(case))
  491. assert recip == pytest.approx(data_recip, nan_ok=True)
  492. data_recip = self.load([1/x for x in data]) # load to truncate precision
  493. recip = self.recip(vdata)
  494. assert recip == data_recip
  495. def test_special_cases(self):
  496. """
  497. Compare Not NaN. Test intrinsics:
  498. npyv_notnan_##SFX
  499. """
  500. nnan = self.notnan(self.setall(self._nan()))
  501. assert nnan == [0]*self.nlanes
  502. @pytest.mark.parametrize("intrin_name", [
  503. "rint", "trunc", "ceil", "floor"
  504. ])
  505. def test_unary_invalid_fpexception(self, intrin_name):
  506. intrin = getattr(self, intrin_name)
  507. for d in [float("nan"), float("inf"), -float("inf")]:
  508. v = self.setall(d)
  509. clear_floatstatus()
  510. intrin(v)
  511. assert check_floatstatus(invalid=True) == False
  512. @pytest.mark.parametrize('py_comp,np_comp', [
  513. (operator.lt, "cmplt"),
  514. (operator.le, "cmple"),
  515. (operator.gt, "cmpgt"),
  516. (operator.ge, "cmpge"),
  517. (operator.eq, "cmpeq"),
  518. (operator.ne, "cmpneq")
  519. ])
  520. def test_comparison_with_nan(self, py_comp, np_comp):
  521. pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan()
  522. mask_true = self._true_mask()
  523. def to_bool(vector):
  524. return [lane == mask_true for lane in vector]
  525. intrin = getattr(self, np_comp)
  526. cmp_cases = ((0, nan), (nan, 0), (nan, nan), (pinf, nan),
  527. (ninf, nan), (-0.0, +0.0))
  528. for case_operand1, case_operand2 in cmp_cases:
  529. data_a = [case_operand1]*self.nlanes
  530. data_b = [case_operand2]*self.nlanes
  531. vdata_a = self.setall(case_operand1)
  532. vdata_b = self.setall(case_operand2)
  533. vcmp = to_bool(intrin(vdata_a, vdata_b))
  534. data_cmp = [py_comp(a, b) for a, b in zip(data_a, data_b)]
  535. assert vcmp == data_cmp
  536. @pytest.mark.parametrize("intrin", ["any", "all"])
  537. @pytest.mark.parametrize("data", (
  538. [float("nan"), 0],
  539. [0, float("nan")],
  540. [float("nan"), 1],
  541. [1, float("nan")],
  542. [float("nan"), float("nan")],
  543. [0.0, -0.0],
  544. [-0.0, 0.0],
  545. [1.0, -0.0]
  546. ))
  547. def test_operators_crosstest(self, intrin, data):
  548. """
  549. Test intrinsics:
  550. npyv_any_##SFX
  551. npyv_all_##SFX
  552. """
  553. data_a = self.load(data * self.nlanes)
  554. func = eval(intrin)
  555. intrin = getattr(self, intrin)
  556. desired = func(data_a)
  557. simd = intrin(data_a)
  558. assert not not simd == desired
  559. class _SIMD_ALL(_Test_Utility):
  560. """
  561. To test all vector types at once
  562. """
  563. def test_memory_load(self):
  564. data = self._data()
  565. # unaligned load
  566. load_data = self.load(data)
  567. assert load_data == data
  568. # aligned load
  569. loada_data = self.loada(data)
  570. assert loada_data == data
  571. # stream load
  572. loads_data = self.loads(data)
  573. assert loads_data == data
  574. # load lower part
  575. loadl = self.loadl(data)
  576. loadl_half = list(loadl)[:self.nlanes//2]
  577. data_half = data[:self.nlanes//2]
  578. assert loadl_half == data_half
  579. assert loadl != data # detect overflow
  580. def test_memory_store(self):
  581. data = self._data()
  582. vdata = self.load(data)
  583. # unaligned store
  584. store = [0] * self.nlanes
  585. self.store(store, vdata)
  586. assert store == data
  587. # aligned store
  588. store_a = [0] * self.nlanes
  589. self.storea(store_a, vdata)
  590. assert store_a == data
  591. # stream store
  592. store_s = [0] * self.nlanes
  593. self.stores(store_s, vdata)
  594. assert store_s == data
  595. # store lower part
  596. store_l = [0] * self.nlanes
  597. self.storel(store_l, vdata)
  598. assert store_l[:self.nlanes//2] == data[:self.nlanes//2]
  599. assert store_l != vdata # detect overflow
  600. # store higher part
  601. store_h = [0] * self.nlanes
  602. self.storeh(store_h, vdata)
  603. assert store_h[:self.nlanes//2] == data[self.nlanes//2:]
  604. assert store_h != vdata # detect overflow
  605. def test_memory_partial_load(self):
  606. if self.sfx in ("u8", "s8", "u16", "s16"):
  607. return
  608. data = self._data()
  609. lanes = list(range(1, self.nlanes + 1))
  610. lanes += [self.nlanes**2, self.nlanes**4] # test out of range
  611. for n in lanes:
  612. load_till = self.load_till(data, n, 15)
  613. data_till = data[:n] + [15] * (self.nlanes-n)
  614. assert load_till == data_till
  615. load_tillz = self.load_tillz(data, n)
  616. data_tillz = data[:n] + [0] * (self.nlanes-n)
  617. assert load_tillz == data_tillz
  618. def test_memory_partial_store(self):
  619. if self.sfx in ("u8", "s8", "u16", "s16"):
  620. return
  621. data = self._data()
  622. data_rev = self._data(reverse=True)
  623. vdata = self.load(data)
  624. lanes = list(range(1, self.nlanes + 1))
  625. lanes += [self.nlanes**2, self.nlanes**4]
  626. for n in lanes:
  627. data_till = data_rev.copy()
  628. data_till[:n] = data[:n]
  629. store_till = self._data(reverse=True)
  630. self.store_till(store_till, n, vdata)
  631. assert store_till == data_till
  632. def test_memory_noncont_load(self):
  633. if self.sfx in ("u8", "s8", "u16", "s16"):
  634. return
  635. for stride in range(1, 64):
  636. data = self._data(count=stride*self.nlanes)
  637. data_stride = data[::stride]
  638. loadn = self.loadn(data, stride)
  639. assert loadn == data_stride
  640. for stride in range(-64, 0):
  641. data = self._data(stride, -stride*self.nlanes)
  642. data_stride = self.load(data[::stride]) # cast unsigned
  643. loadn = self.loadn(data, stride)
  644. assert loadn == data_stride
  645. def test_memory_noncont_partial_load(self):
  646. if self.sfx in ("u8", "s8", "u16", "s16"):
  647. return
  648. lanes = list(range(1, self.nlanes + 1))
  649. lanes += [self.nlanes**2, self.nlanes**4]
  650. for stride in range(1, 64):
  651. data = self._data(count=stride*self.nlanes)
  652. data_stride = data[::stride]
  653. for n in lanes:
  654. data_stride_till = data_stride[:n] + [15] * (self.nlanes-n)
  655. loadn_till = self.loadn_till(data, stride, n, 15)
  656. assert loadn_till == data_stride_till
  657. data_stride_tillz = data_stride[:n] + [0] * (self.nlanes-n)
  658. loadn_tillz = self.loadn_tillz(data, stride, n)
  659. assert loadn_tillz == data_stride_tillz
  660. for stride in range(-64, 0):
  661. data = self._data(stride, -stride*self.nlanes)
  662. data_stride = list(self.load(data[::stride])) # cast unsigned
  663. for n in lanes:
  664. data_stride_till = data_stride[:n] + [15] * (self.nlanes-n)
  665. loadn_till = self.loadn_till(data, stride, n, 15)
  666. assert loadn_till == data_stride_till
  667. data_stride_tillz = data_stride[:n] + [0] * (self.nlanes-n)
  668. loadn_tillz = self.loadn_tillz(data, stride, n)
  669. assert loadn_tillz == data_stride_tillz
  670. def test_memory_noncont_store(self):
  671. if self.sfx in ("u8", "s8", "u16", "s16"):
  672. return
  673. vdata = self.load(self._data())
  674. for stride in range(1, 64):
  675. data = [15] * stride * self.nlanes
  676. data[::stride] = vdata
  677. storen = [15] * stride * self.nlanes
  678. storen += [127]*64
  679. self.storen(storen, stride, vdata)
  680. assert storen[:-64] == data
  681. assert storen[-64:] == [127]*64 # detect overflow
  682. for stride in range(-64, 0):
  683. data = [15] * -stride * self.nlanes
  684. data[::stride] = vdata
  685. storen = [127]*64
  686. storen += [15] * -stride * self.nlanes
  687. self.storen(storen, stride, vdata)
  688. assert storen[64:] == data
  689. assert storen[:64] == [127]*64 # detect overflow
  690. def test_memory_noncont_partial_store(self):
  691. if self.sfx in ("u8", "s8", "u16", "s16"):
  692. return
  693. data = self._data()
  694. vdata = self.load(data)
  695. lanes = list(range(1, self.nlanes + 1))
  696. lanes += [self.nlanes**2, self.nlanes**4]
  697. for stride in range(1, 64):
  698. for n in lanes:
  699. data_till = [15] * stride * self.nlanes
  700. data_till[::stride] = data[:n] + [15] * (self.nlanes-n)
  701. storen_till = [15] * stride * self.nlanes
  702. storen_till += [127]*64
  703. self.storen_till(storen_till, stride, n, vdata)
  704. assert storen_till[:-64] == data_till
  705. assert storen_till[-64:] == [127]*64 # detect overflow
  706. for stride in range(-64, 0):
  707. for n in lanes:
  708. data_till = [15] * -stride * self.nlanes
  709. data_till[::stride] = data[:n] + [15] * (self.nlanes-n)
  710. storen_till = [127]*64
  711. storen_till += [15] * -stride * self.nlanes
  712. self.storen_till(storen_till, stride, n, vdata)
  713. assert storen_till[64:] == data_till
  714. assert storen_till[:64] == [127]*64 # detect overflow
  715. @pytest.mark.parametrize("intrin, table_size, elsize", [
  716. ("self.lut32", 32, 32),
  717. ("self.lut16", 16, 64)
  718. ])
  719. def test_lut(self, intrin, table_size, elsize):
  720. """
  721. Test lookup table intrinsics:
  722. npyv_lut32_##sfx
  723. npyv_lut16_##sfx
  724. """
  725. if elsize != self._scalar_size():
  726. return
  727. intrin = eval(intrin)
  728. idx_itrin = getattr(self.npyv, f"setall_u{elsize}")
  729. table = range(0, table_size)
  730. for i in table:
  731. broadi = self.setall(i)
  732. idx = idx_itrin(i)
  733. lut = intrin(table, idx)
  734. assert lut == broadi
  735. def test_misc(self):
  736. broadcast_zero = self.zero()
  737. assert broadcast_zero == [0] * self.nlanes
  738. for i in range(1, 10):
  739. broadcasti = self.setall(i)
  740. assert broadcasti == [i] * self.nlanes
  741. data_a, data_b = self._data(), self._data(reverse=True)
  742. vdata_a, vdata_b = self.load(data_a), self.load(data_b)
  743. # py level of npyv_set_* don't support ignoring the extra specified lanes or
  744. # fill non-specified lanes with zero.
  745. vset = self.set(*data_a)
  746. assert vset == data_a
  747. # py level of npyv_setf_* don't support ignoring the extra specified lanes or
  748. # fill non-specified lanes with the specified scalar.
  749. vsetf = self.setf(10, *data_a)
  750. assert vsetf == data_a
  751. # We're testing the sanity of _simd's type-vector,
  752. # reinterpret* intrinsics itself are tested via compiler
  753. # during the build of _simd module
  754. sfxes = ["u8", "s8", "u16", "s16", "u32", "s32", "u64", "s64"]
  755. if self.npyv.simd_f64:
  756. sfxes.append("f64")
  757. if self.npyv.simd_f32:
  758. sfxes.append("f32")
  759. for sfx in sfxes:
  760. vec_name = getattr(self, "reinterpret_" + sfx)(vdata_a).__name__
  761. assert vec_name == "npyv_" + sfx
  762. # select & mask operations
  763. select_a = self.select(self.cmpeq(self.zero(), self.zero()), vdata_a, vdata_b)
  764. assert select_a == data_a
  765. select_b = self.select(self.cmpneq(self.zero(), self.zero()), vdata_a, vdata_b)
  766. assert select_b == data_b
  767. # test extract elements
  768. assert self.extract0(vdata_b) == vdata_b[0]
  769. # cleanup intrinsic is only used with AVX for
  770. # zeroing registers to avoid the AVX-SSE transition penalty,
  771. # so nothing to test here
  772. self.npyv.cleanup()
  773. def test_reorder(self):
  774. data_a, data_b = self._data(), self._data(reverse=True)
  775. vdata_a, vdata_b = self.load(data_a), self.load(data_b)
  776. # lower half part
  777. data_a_lo = data_a[:self.nlanes//2]
  778. data_b_lo = data_b[:self.nlanes//2]
  779. # higher half part
  780. data_a_hi = data_a[self.nlanes//2:]
  781. data_b_hi = data_b[self.nlanes//2:]
  782. # combine two lower parts
  783. combinel = self.combinel(vdata_a, vdata_b)
  784. assert combinel == data_a_lo + data_b_lo
  785. # combine two higher parts
  786. combineh = self.combineh(vdata_a, vdata_b)
  787. assert combineh == data_a_hi + data_b_hi
  788. # combine x2
  789. combine = self.combine(vdata_a, vdata_b)
  790. assert combine == (data_a_lo + data_b_lo, data_a_hi + data_b_hi)
  791. # zip(interleave)
  792. data_zipl = [v for p in zip(data_a_lo, data_b_lo) for v in p]
  793. data_ziph = [v for p in zip(data_a_hi, data_b_hi) for v in p]
  794. vzip = self.zip(vdata_a, vdata_b)
  795. assert vzip == (data_zipl, data_ziph)
  796. def test_reorder_rev64(self):
  797. # Reverse elements of each 64-bit lane
  798. ssize = self._scalar_size()
  799. if ssize == 64:
  800. return
  801. data_rev64 = [
  802. y for x in range(0, self.nlanes, 64//ssize)
  803. for y in reversed(range(x, x + 64//ssize))
  804. ]
  805. rev64 = self.rev64(self.load(range(self.nlanes)))
  806. assert rev64 == data_rev64
  807. @pytest.mark.parametrize('func, intrin', [
  808. (operator.lt, "cmplt"),
  809. (operator.le, "cmple"),
  810. (operator.gt, "cmpgt"),
  811. (operator.ge, "cmpge"),
  812. (operator.eq, "cmpeq")
  813. ])
  814. def test_operators_comparison(self, func, intrin):
  815. if self._is_fp():
  816. data_a = self._data()
  817. else:
  818. data_a = self._data(self._int_max() - self.nlanes)
  819. data_b = self._data(self._int_min(), reverse=True)
  820. vdata_a, vdata_b = self.load(data_a), self.load(data_b)
  821. intrin = getattr(self, intrin)
  822. mask_true = self._true_mask()
  823. def to_bool(vector):
  824. return [lane == mask_true for lane in vector]
  825. data_cmp = [func(a, b) for a, b in zip(data_a, data_b)]
  826. cmp = to_bool(intrin(vdata_a, vdata_b))
  827. assert cmp == data_cmp
  828. def test_operators_logical(self):
  829. if self._is_fp():
  830. data_a = self._data()
  831. else:
  832. data_a = self._data(self._int_max() - self.nlanes)
  833. data_b = self._data(self._int_min(), reverse=True)
  834. vdata_a, vdata_b = self.load(data_a), self.load(data_b)
  835. if self._is_fp():
  836. data_cast_a = self._to_unsigned(vdata_a)
  837. data_cast_b = self._to_unsigned(vdata_b)
  838. cast, cast_data = self._to_unsigned, self._to_unsigned
  839. else:
  840. data_cast_a, data_cast_b = data_a, data_b
  841. cast, cast_data = lambda a: a, self.load
  842. data_xor = cast_data([a ^ b for a, b in zip(data_cast_a, data_cast_b)])
  843. vxor = cast(self.xor(vdata_a, vdata_b))
  844. assert vxor == data_xor
  845. data_or = cast_data([a | b for a, b in zip(data_cast_a, data_cast_b)])
  846. vor = cast(getattr(self, "or")(vdata_a, vdata_b))
  847. assert vor == data_or
  848. data_and = cast_data([a & b for a, b in zip(data_cast_a, data_cast_b)])
  849. vand = cast(getattr(self, "and")(vdata_a, vdata_b))
  850. assert vand == data_and
  851. data_not = cast_data([~a for a in data_cast_a])
  852. vnot = cast(getattr(self, "not")(vdata_a))
  853. assert vnot == data_not
  854. if self.sfx not in ("u8"):
  855. return
  856. data_andc = [a & ~b for a, b in zip(data_cast_a, data_cast_b)]
  857. vandc = cast(getattr(self, "andc")(vdata_a, vdata_b))
  858. assert vandc == data_andc
  859. @pytest.mark.parametrize("intrin", ["any", "all"])
  860. @pytest.mark.parametrize("data", (
  861. [1, 2, 3, 4],
  862. [-1, -2, -3, -4],
  863. [0, 1, 2, 3, 4],
  864. [0x7f, 0x7fff, 0x7fffffff, 0x7fffffffffffffff],
  865. [0, -1, -2, -3, 4],
  866. [0],
  867. [1],
  868. [-1]
  869. ))
  870. def test_operators_crosstest(self, intrin, data):
  871. """
  872. Test intrinsics:
  873. npyv_any_##SFX
  874. npyv_all_##SFX
  875. """
  876. data_a = self.load(data * self.nlanes)
  877. func = eval(intrin)
  878. intrin = getattr(self, intrin)
  879. desired = func(data_a)
  880. simd = intrin(data_a)
  881. assert not not simd == desired
  882. def test_conversion_boolean(self):
  883. bsfx = "b" + self.sfx[1:]
  884. to_boolean = getattr(self.npyv, "cvt_%s_%s" % (bsfx, self.sfx))
  885. from_boolean = getattr(self.npyv, "cvt_%s_%s" % (self.sfx, bsfx))
  886. false_vb = to_boolean(self.setall(0))
  887. true_vb = self.cmpeq(self.setall(0), self.setall(0))
  888. assert false_vb != true_vb
  889. false_vsfx = from_boolean(false_vb)
  890. true_vsfx = from_boolean(true_vb)
  891. assert false_vsfx != true_vsfx
  892. def test_conversion_expand(self):
  893. """
  894. Test expand intrinsics:
  895. npyv_expand_u16_u8
  896. npyv_expand_u32_u16
  897. """
  898. if self.sfx not in ("u8", "u16"):
  899. return
  900. totype = self.sfx[0]+str(int(self.sfx[1:])*2)
  901. expand = getattr(self.npyv, f"expand_{totype}_{self.sfx}")
  902. # close enough from the edge to detect any deviation
  903. data = self._data(self._int_max() - self.nlanes)
  904. vdata = self.load(data)
  905. edata = expand(vdata)
  906. # lower half part
  907. data_lo = data[:self.nlanes//2]
  908. # higher half part
  909. data_hi = data[self.nlanes//2:]
  910. assert edata == (data_lo, data_hi)
  911. def test_arithmetic_subadd(self):
  912. if self._is_fp():
  913. data_a = self._data()
  914. else:
  915. data_a = self._data(self._int_max() - self.nlanes)
  916. data_b = self._data(self._int_min(), reverse=True)
  917. vdata_a, vdata_b = self.load(data_a), self.load(data_b)
  918. # non-saturated
  919. data_add = self.load([a + b for a, b in zip(data_a, data_b)]) # load to cast
  920. add = self.add(vdata_a, vdata_b)
  921. assert add == data_add
  922. data_sub = self.load([a - b for a, b in zip(data_a, data_b)])
  923. sub = self.sub(vdata_a, vdata_b)
  924. assert sub == data_sub
  925. def test_arithmetic_mul(self):
  926. if self.sfx in ("u64", "s64"):
  927. return
  928. if self._is_fp():
  929. data_a = self._data()
  930. else:
  931. data_a = self._data(self._int_max() - self.nlanes)
  932. data_b = self._data(self._int_min(), reverse=True)
  933. vdata_a, vdata_b = self.load(data_a), self.load(data_b)
  934. data_mul = self.load([a * b for a, b in zip(data_a, data_b)])
  935. mul = self.mul(vdata_a, vdata_b)
  936. assert mul == data_mul
  937. def test_arithmetic_div(self):
  938. if not self._is_fp():
  939. return
  940. data_a, data_b = self._data(), self._data(reverse=True)
  941. vdata_a, vdata_b = self.load(data_a), self.load(data_b)
  942. # load to truncate f64 to precision of f32
  943. data_div = self.load([a / b for a, b in zip(data_a, data_b)])
  944. div = self.div(vdata_a, vdata_b)
  945. assert div == data_div
  946. def test_arithmetic_intdiv(self):
  947. """
  948. Test integer division intrinsics:
  949. npyv_divisor_##sfx
  950. npyv_divc_##sfx
  951. """
  952. if self._is_fp():
  953. return
  954. int_min = self._int_min()
  955. def trunc_div(a, d):
  956. """
  957. Divide towards zero works with large integers > 2^53,
  958. and wrap around overflow similar to what C does.
  959. """
  960. if d == -1 and a == int_min:
  961. return a
  962. sign_a, sign_d = a < 0, d < 0
  963. if a == 0 or sign_a == sign_d:
  964. return a // d
  965. return (a + sign_d - sign_a) // d + 1
  966. data = [1, -int_min] # to test overflow
  967. data += range(0, 2**8, 2**5)
  968. data += range(0, 2**8, 2**5-1)
  969. bsize = self._scalar_size()
  970. if bsize > 8:
  971. data += range(2**8, 2**16, 2**13)
  972. data += range(2**8, 2**16, 2**13-1)
  973. if bsize > 16:
  974. data += range(2**16, 2**32, 2**29)
  975. data += range(2**16, 2**32, 2**29-1)
  976. if bsize > 32:
  977. data += range(2**32, 2**64, 2**61)
  978. data += range(2**32, 2**64, 2**61-1)
  979. # negate
  980. data += [-x for x in data]
  981. for dividend, divisor in itertools.product(data, data):
  982. divisor = self.setall(divisor)[0] # cast
  983. if divisor == 0:
  984. continue
  985. dividend = self.load(self._data(dividend))
  986. data_divc = [trunc_div(a, divisor) for a in dividend]
  987. divisor_parms = self.divisor(divisor)
  988. divc = self.divc(dividend, divisor_parms)
  989. assert divc == data_divc
  990. def test_arithmetic_reduce_sum(self):
  991. """
  992. Test reduce sum intrinsics:
  993. npyv_sum_##sfx
  994. """
  995. if self.sfx not in ("u32", "u64", "f32", "f64"):
  996. return
  997. # reduce sum
  998. data = self._data()
  999. vdata = self.load(data)
  1000. data_sum = sum(data)
  1001. vsum = self.sum(vdata)
  1002. assert vsum == data_sum
  1003. def test_arithmetic_reduce_sumup(self):
  1004. """
  1005. Test extend reduce sum intrinsics:
  1006. npyv_sumup_##sfx
  1007. """
  1008. if self.sfx not in ("u8", "u16"):
  1009. return
  1010. rdata = (0, self.nlanes, self._int_min(), self._int_max()-self.nlanes)
  1011. for r in rdata:
  1012. data = self._data(r)
  1013. vdata = self.load(data)
  1014. data_sum = sum(data)
  1015. vsum = self.sumup(vdata)
  1016. assert vsum == data_sum
  1017. def test_mask_conditional(self):
  1018. """
  1019. Conditional addition and subtraction for all supported data types.
  1020. Test intrinsics:
  1021. npyv_ifadd_##SFX, npyv_ifsub_##SFX
  1022. """
  1023. vdata_a = self.load(self._data())
  1024. vdata_b = self.load(self._data(reverse=True))
  1025. true_mask = self.cmpeq(self.zero(), self.zero())
  1026. false_mask = self.cmpneq(self.zero(), self.zero())
  1027. data_sub = self.sub(vdata_b, vdata_a)
  1028. ifsub = self.ifsub(true_mask, vdata_b, vdata_a, vdata_b)
  1029. assert ifsub == data_sub
  1030. ifsub = self.ifsub(false_mask, vdata_a, vdata_b, vdata_b)
  1031. assert ifsub == vdata_b
  1032. data_add = self.add(vdata_b, vdata_a)
  1033. ifadd = self.ifadd(true_mask, vdata_b, vdata_a, vdata_b)
  1034. assert ifadd == data_add
  1035. ifadd = self.ifadd(false_mask, vdata_a, vdata_b, vdata_b)
  1036. assert ifadd == vdata_b
  1037. bool_sfx = ("b8", "b16", "b32", "b64")
  1038. int_sfx = ("u8", "s8", "u16", "s16", "u32", "s32", "u64", "s64")
  1039. fp_sfx = ("f32", "f64")
  1040. all_sfx = int_sfx + fp_sfx
  1041. tests_registry = {
  1042. bool_sfx: _SIMD_BOOL,
  1043. int_sfx : _SIMD_INT,
  1044. fp_sfx : _SIMD_FP,
  1045. ("f32",): _SIMD_FP32,
  1046. ("f64",): _SIMD_FP64,
  1047. all_sfx : _SIMD_ALL
  1048. }
  1049. for target_name, npyv in targets.items():
  1050. simd_width = npyv.simd if npyv else ''
  1051. pretty_name = target_name.split('__') # multi-target separator
  1052. if len(pretty_name) > 1:
  1053. # multi-target
  1054. pretty_name = f"({' '.join(pretty_name)})"
  1055. else:
  1056. pretty_name = pretty_name[0]
  1057. skip = ""
  1058. skip_sfx = dict()
  1059. if not npyv:
  1060. skip = f"target '{pretty_name}' isn't supported by current machine"
  1061. elif not npyv.simd:
  1062. skip = f"target '{pretty_name}' isn't supported by NPYV"
  1063. else:
  1064. if not npyv.simd_f32:
  1065. skip_sfx["f32"] = f"target '{pretty_name}' "\
  1066. "doesn't support single-precision"
  1067. if not npyv.simd_f64:
  1068. skip_sfx["f64"] = f"target '{pretty_name}' doesn't"\
  1069. "support double-precision"
  1070. for sfxes, cls in tests_registry.items():
  1071. for sfx in sfxes:
  1072. skip_m = skip_sfx.get(sfx, skip)
  1073. inhr = (cls,)
  1074. attr = dict(npyv=targets[target_name], sfx=sfx, target_name=target_name)
  1075. tcls = type(f"Test{cls.__name__}_{simd_width}_{target_name}_{sfx}", inhr, attr)
  1076. if skip_m:
  1077. pytest.mark.skip(reason=skip_m)(tcls)
  1078. globals()[tcls.__name__] = tcls