test_ccompiler_opt.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808
  1. import re, textwrap, os
  2. from os import sys, path
  3. from distutils.errors import DistutilsError
  4. is_standalone = __name__ == '__main__' and __package__ is None
  5. if is_standalone:
  6. import unittest, contextlib, tempfile, shutil
  7. sys.path.append(path.abspath(path.join(path.dirname(__file__), "..")))
  8. from ccompiler_opt import CCompilerOpt
  9. # from numpy/testing/_private/utils.py
  10. @contextlib.contextmanager
  11. def tempdir(*args, **kwargs):
  12. tmpdir = tempfile.mkdtemp(*args, **kwargs)
  13. try:
  14. yield tmpdir
  15. finally:
  16. shutil.rmtree(tmpdir)
  17. def assert_(expr, msg=''):
  18. if not expr:
  19. raise AssertionError(msg)
  20. else:
  21. from numpy.distutils.ccompiler_opt import CCompilerOpt
  22. from numpy.testing import assert_, tempdir
  23. # architectures and compilers to test
  24. arch_compilers = dict(
  25. x86 = ("gcc", "clang", "icc", "iccw", "msvc"),
  26. x64 = ("gcc", "clang", "icc", "iccw", "msvc"),
  27. ppc64 = ("gcc", "clang"),
  28. ppc64le = ("gcc", "clang"),
  29. armhf = ("gcc", "clang"),
  30. aarch64 = ("gcc", "clang"),
  31. s390x = ("gcc", "clang"),
  32. noarch = ("gcc",)
  33. )
  34. class FakeCCompilerOpt(CCompilerOpt):
  35. fake_info = ""
  36. def __init__(self, trap_files="", trap_flags="", *args, **kwargs):
  37. self.fake_trap_files = trap_files
  38. self.fake_trap_flags = trap_flags
  39. CCompilerOpt.__init__(self, None, **kwargs)
  40. def __repr__(self):
  41. return textwrap.dedent("""\
  42. <<<<
  43. march : {}
  44. compiler : {}
  45. ----------------
  46. {}
  47. >>>>
  48. """).format(self.cc_march, self.cc_name, self.report())
  49. def dist_compile(self, sources, flags, **kwargs):
  50. assert(isinstance(sources, list))
  51. assert(isinstance(flags, list))
  52. if self.fake_trap_files:
  53. for src in sources:
  54. if re.match(self.fake_trap_files, src):
  55. self.dist_error("source is trapped by a fake interface")
  56. if self.fake_trap_flags:
  57. for f in flags:
  58. if re.match(self.fake_trap_flags, f):
  59. self.dist_error("flag is trapped by a fake interface")
  60. # fake objects
  61. return zip(sources, [' '.join(flags)] * len(sources))
  62. def dist_info(self):
  63. return FakeCCompilerOpt.fake_info
  64. @staticmethod
  65. def dist_log(*args, stderr=False):
  66. pass
  67. class _Test_CCompilerOpt:
  68. arch = None # x86_64
  69. cc = None # gcc
  70. def setup_class(self):
  71. FakeCCompilerOpt.conf_nocache = True
  72. self._opt = None
  73. def nopt(self, *args, **kwargs):
  74. FakeCCompilerOpt.fake_info = (self.arch, self.cc, "")
  75. return FakeCCompilerOpt(*args, **kwargs)
  76. def opt(self):
  77. if not self._opt:
  78. self._opt = self.nopt()
  79. return self._opt
  80. def march(self):
  81. return self.opt().cc_march
  82. def cc_name(self):
  83. return self.opt().cc_name
  84. def get_targets(self, targets, groups, **kwargs):
  85. FakeCCompilerOpt.conf_target_groups = groups
  86. opt = self.nopt(
  87. cpu_baseline=kwargs.get("baseline", "min"),
  88. cpu_dispatch=kwargs.get("dispatch", "max"),
  89. trap_files=kwargs.get("trap_files", ""),
  90. trap_flags=kwargs.get("trap_flags", "")
  91. )
  92. with tempdir() as tmpdir:
  93. file = os.path.join(tmpdir, "test_targets.c")
  94. with open(file, 'w') as f:
  95. f.write(targets)
  96. gtargets = []
  97. gflags = {}
  98. fake_objects = opt.try_dispatch([file])
  99. for source, flags in fake_objects:
  100. gtar = path.basename(source).split('.')[1:-1]
  101. glen = len(gtar)
  102. if glen == 0:
  103. gtar = "baseline"
  104. elif glen == 1:
  105. gtar = gtar[0].upper()
  106. else:
  107. # converting multi-target into parentheses str format to be equivalent
  108. # to the configuration statements syntax.
  109. gtar = ('('+' '.join(gtar)+')').upper()
  110. gtargets.append(gtar)
  111. gflags[gtar] = flags
  112. has_baseline, targets = opt.sources_status[file]
  113. targets = targets + ["baseline"] if has_baseline else targets
  114. # convert tuple that represent multi-target into parentheses str format
  115. targets = [
  116. '('+' '.join(tar)+')' if isinstance(tar, tuple) else tar
  117. for tar in targets
  118. ]
  119. if len(targets) != len(gtargets) or not all(t in gtargets for t in targets):
  120. raise AssertionError(
  121. "'sources_status' returns different targets than the compiled targets\n"
  122. "%s != %s" % (targets, gtargets)
  123. )
  124. # return targets from 'sources_status' since the order is matters
  125. return targets, gflags
  126. def arg_regex(self, **kwargs):
  127. map2origin = dict(
  128. x64 = "x86",
  129. ppc64le = "ppc64",
  130. aarch64 = "armhf",
  131. clang = "gcc",
  132. )
  133. march = self.march(); cc_name = self.cc_name()
  134. map_march = map2origin.get(march, march)
  135. map_cc = map2origin.get(cc_name, cc_name)
  136. for key in (
  137. march, cc_name, map_march, map_cc,
  138. march + '_' + cc_name,
  139. map_march + '_' + cc_name,
  140. march + '_' + map_cc,
  141. map_march + '_' + map_cc,
  142. ) :
  143. regex = kwargs.pop(key, None)
  144. if regex is not None:
  145. break
  146. if regex:
  147. if isinstance(regex, dict):
  148. for k, v in regex.items():
  149. if v[-1:] not in ')}$?\\.+*':
  150. regex[k] = v + '$'
  151. else:
  152. assert(isinstance(regex, str))
  153. if regex[-1:] not in ')}$?\\.+*':
  154. regex += '$'
  155. return regex
  156. def expect(self, dispatch, baseline="", **kwargs):
  157. match = self.arg_regex(**kwargs)
  158. if match is None:
  159. return
  160. opt = self.nopt(
  161. cpu_baseline=baseline, cpu_dispatch=dispatch,
  162. trap_files=kwargs.get("trap_files", ""),
  163. trap_flags=kwargs.get("trap_flags", "")
  164. )
  165. features = ' '.join(opt.cpu_dispatch_names())
  166. if not match:
  167. if len(features) != 0:
  168. raise AssertionError(
  169. 'expected empty features, not "%s"' % features
  170. )
  171. return
  172. if not re.match(match, features, re.IGNORECASE):
  173. raise AssertionError(
  174. 'dispatch features "%s" not match "%s"' % (features, match)
  175. )
  176. def expect_baseline(self, baseline, dispatch="", **kwargs):
  177. match = self.arg_regex(**kwargs)
  178. if match is None:
  179. return
  180. opt = self.nopt(
  181. cpu_baseline=baseline, cpu_dispatch=dispatch,
  182. trap_files=kwargs.get("trap_files", ""),
  183. trap_flags=kwargs.get("trap_flags", "")
  184. )
  185. features = ' '.join(opt.cpu_baseline_names())
  186. if not match:
  187. if len(features) != 0:
  188. raise AssertionError(
  189. 'expected empty features, not "%s"' % features
  190. )
  191. return
  192. if not re.match(match, features, re.IGNORECASE):
  193. raise AssertionError(
  194. 'baseline features "%s" not match "%s"' % (features, match)
  195. )
  196. def expect_flags(self, baseline, dispatch="", **kwargs):
  197. match = self.arg_regex(**kwargs)
  198. if match is None:
  199. return
  200. opt = self.nopt(
  201. cpu_baseline=baseline, cpu_dispatch=dispatch,
  202. trap_files=kwargs.get("trap_files", ""),
  203. trap_flags=kwargs.get("trap_flags", "")
  204. )
  205. flags = ' '.join(opt.cpu_baseline_flags())
  206. if not match:
  207. if len(flags) != 0:
  208. raise AssertionError(
  209. 'expected empty flags not "%s"' % flags
  210. )
  211. return
  212. if not re.match(match, flags):
  213. raise AssertionError(
  214. 'flags "%s" not match "%s"' % (flags, match)
  215. )
  216. def expect_targets(self, targets, groups={}, **kwargs):
  217. match = self.arg_regex(**kwargs)
  218. if match is None:
  219. return
  220. targets, _ = self.get_targets(targets=targets, groups=groups, **kwargs)
  221. targets = ' '.join(targets)
  222. if not match:
  223. if len(targets) != 0:
  224. raise AssertionError(
  225. 'expected empty targets, not "%s"' % targets
  226. )
  227. return
  228. if not re.match(match, targets, re.IGNORECASE):
  229. raise AssertionError(
  230. 'targets "%s" not match "%s"' % (targets, match)
  231. )
  232. def expect_target_flags(self, targets, groups={}, **kwargs):
  233. match_dict = self.arg_regex(**kwargs)
  234. if match_dict is None:
  235. return
  236. assert(isinstance(match_dict, dict))
  237. _, tar_flags = self.get_targets(targets=targets, groups=groups)
  238. for match_tar, match_flags in match_dict.items():
  239. if match_tar not in tar_flags:
  240. raise AssertionError(
  241. 'expected to find target "%s"' % match_tar
  242. )
  243. flags = tar_flags[match_tar]
  244. if not match_flags:
  245. if len(flags) != 0:
  246. raise AssertionError(
  247. 'expected to find empty flags in target "%s"' % match_tar
  248. )
  249. if not re.match(match_flags, flags):
  250. raise AssertionError(
  251. '"%s" flags "%s" not match "%s"' % (match_tar, flags, match_flags)
  252. )
  253. def test_interface(self):
  254. wrong_arch = "ppc64" if self.arch != "ppc64" else "x86"
  255. wrong_cc = "clang" if self.cc != "clang" else "icc"
  256. opt = self.opt()
  257. assert_(getattr(opt, "cc_on_" + self.arch))
  258. assert_(not getattr(opt, "cc_on_" + wrong_arch))
  259. assert_(getattr(opt, "cc_is_" + self.cc))
  260. assert_(not getattr(opt, "cc_is_" + wrong_cc))
  261. def test_args_empty(self):
  262. for baseline, dispatch in (
  263. ("", "none"),
  264. (None, ""),
  265. ("none +none", "none - none"),
  266. ("none -max", "min - max"),
  267. ("+vsx2 -VSX2", "vsx avx2 avx512f -max"),
  268. ("max -vsx - avx + avx512f neon -MAX ",
  269. "min -min + max -max -vsx + avx2 -avx2 +NONE")
  270. ) :
  271. opt = self.nopt(cpu_baseline=baseline, cpu_dispatch=dispatch)
  272. assert(len(opt.cpu_baseline_names()) == 0)
  273. assert(len(opt.cpu_dispatch_names()) == 0)
  274. def test_args_validation(self):
  275. if self.march() == "unknown":
  276. return
  277. # check sanity of argument's validation
  278. for baseline, dispatch in (
  279. ("unkown_feature - max +min", "unknown max min"), # unknowing features
  280. ("#avx2", "$vsx") # groups and polices aren't acceptable
  281. ) :
  282. try:
  283. self.nopt(cpu_baseline=baseline, cpu_dispatch=dispatch)
  284. raise AssertionError("excepted an exception for invalid arguments")
  285. except DistutilsError:
  286. pass
  287. def test_skip(self):
  288. # only takes what platform supports and skip the others
  289. # without casing exceptions
  290. self.expect(
  291. "sse vsx neon",
  292. x86="sse", ppc64="vsx", armhf="neon", unknown=""
  293. )
  294. self.expect(
  295. "sse41 avx avx2 vsx2 vsx3 neon_vfpv4 asimd",
  296. x86 = "sse41 avx avx2",
  297. ppc64 = "vsx2 vsx3",
  298. armhf = "neon_vfpv4 asimd",
  299. unknown = ""
  300. )
  301. # any features in cpu_dispatch must be ignored if it's part of baseline
  302. self.expect(
  303. "sse neon vsx", baseline="sse neon vsx",
  304. x86="", ppc64="", armhf=""
  305. )
  306. self.expect(
  307. "avx2 vsx3 asimdhp", baseline="avx2 vsx3 asimdhp",
  308. x86="", ppc64="", armhf=""
  309. )
  310. def test_implies(self):
  311. # baseline combining implied features, so we count
  312. # on it instead of testing 'feature_implies()'' directly
  313. self.expect_baseline(
  314. "fma3 avx2 asimd vsx3",
  315. # .* between two spaces can validate features in between
  316. x86 = "sse .* sse41 .* fma3.*avx2",
  317. ppc64 = "vsx vsx2 vsx3",
  318. armhf = "neon neon_fp16 neon_vfpv4 asimd"
  319. )
  320. """
  321. special cases
  322. """
  323. # in icc and msvc, FMA3 and AVX2 can't be separated
  324. # both need to implies each other, same for avx512f & cd
  325. for f0, f1 in (
  326. ("fma3", "avx2"),
  327. ("avx512f", "avx512cd"),
  328. ):
  329. diff = ".* sse42 .* %s .*%s$" % (f0, f1)
  330. self.expect_baseline(f0,
  331. x86_gcc=".* sse42 .* %s$" % f0,
  332. x86_icc=diff, x86_iccw=diff
  333. )
  334. self.expect_baseline(f1,
  335. x86_gcc=".* avx .* %s$" % f1,
  336. x86_icc=diff, x86_iccw=diff
  337. )
  338. # in msvc, following features can't be separated too
  339. for f in (("fma3", "avx2"), ("avx512f", "avx512cd", "avx512_skx")):
  340. for ff in f:
  341. self.expect_baseline(ff,
  342. x86_msvc=".*%s" % ' '.join(f)
  343. )
  344. # in ppc64le VSX and VSX2 can't be separated
  345. self.expect_baseline("vsx", ppc64le="vsx vsx2")
  346. # in aarch64 following features can't be separated
  347. for f in ("neon", "neon_fp16", "neon_vfpv4", "asimd"):
  348. self.expect_baseline(f, aarch64="neon neon_fp16 neon_vfpv4 asimd")
  349. def test_args_options(self):
  350. # max & native
  351. for o in ("max", "native"):
  352. if o == "native" and self.cc_name() == "msvc":
  353. continue
  354. self.expect(o,
  355. trap_files=".*cpu_(sse|vsx|neon|vx).c",
  356. x86="", ppc64="", armhf="", s390x=""
  357. )
  358. self.expect(o,
  359. trap_files=".*cpu_(sse3|vsx2|neon_vfpv4|vxe).c",
  360. x86="sse sse2", ppc64="vsx", armhf="neon neon_fp16",
  361. aarch64="", ppc64le="", s390x="vx"
  362. )
  363. self.expect(o,
  364. trap_files=".*cpu_(popcnt|vsx3).c",
  365. x86="sse .* sse41", ppc64="vsx vsx2",
  366. armhf="neon neon_fp16 .* asimd .*",
  367. s390x="vx vxe vxe2"
  368. )
  369. self.expect(o,
  370. x86_gcc=".* xop fma4 .* avx512f .* avx512_knl avx512_knm avx512_skx .*",
  371. # in icc, xop and fam4 aren't supported
  372. x86_icc=".* avx512f .* avx512_knl avx512_knm avx512_skx .*",
  373. x86_iccw=".* avx512f .* avx512_knl avx512_knm avx512_skx .*",
  374. # in msvc, avx512_knl avx512_knm aren't supported
  375. x86_msvc=".* xop fma4 .* avx512f .* avx512_skx .*",
  376. armhf=".* asimd asimdhp asimddp .*",
  377. ppc64="vsx vsx2 vsx3 vsx4.*",
  378. s390x="vx vxe vxe2.*"
  379. )
  380. # min
  381. self.expect("min",
  382. x86="sse sse2", x64="sse sse2 sse3",
  383. armhf="", aarch64="neon neon_fp16 .* asimd",
  384. ppc64="", ppc64le="vsx vsx2", s390x=""
  385. )
  386. self.expect(
  387. "min", trap_files=".*cpu_(sse2|vsx2).c",
  388. x86="", ppc64le=""
  389. )
  390. # an exception must triggered if native flag isn't supported
  391. # when option "native" is activated through the args
  392. try:
  393. self.expect("native",
  394. trap_flags=".*(-march=native|-xHost|/QxHost).*",
  395. x86=".*", ppc64=".*", armhf=".*", s390x=".*"
  396. )
  397. if self.march() != "unknown":
  398. raise AssertionError(
  399. "excepted an exception for %s" % self.march()
  400. )
  401. except DistutilsError:
  402. if self.march() == "unknown":
  403. raise AssertionError("excepted no exceptions")
  404. def test_flags(self):
  405. self.expect_flags(
  406. "sse sse2 vsx vsx2 neon neon_fp16 vx vxe",
  407. x86_gcc="-msse -msse2", x86_icc="-msse -msse2",
  408. x86_iccw="/arch:SSE2",
  409. x86_msvc="/arch:SSE2" if self.march() == "x86" else "",
  410. ppc64_gcc= "-mcpu=power8",
  411. ppc64_clang="-maltivec -mvsx -mpower8-vector",
  412. armhf_gcc="-mfpu=neon-fp16 -mfp16-format=ieee",
  413. aarch64="",
  414. s390x="-mzvector -march=arch12"
  415. )
  416. # testing normalize -march
  417. self.expect_flags(
  418. "asimd",
  419. aarch64="",
  420. armhf_gcc=r"-mfp16-format=ieee -mfpu=neon-fp-armv8 -march=armv8-a\+simd"
  421. )
  422. self.expect_flags(
  423. "asimdhp",
  424. aarch64_gcc=r"-march=armv8.2-a\+fp16",
  425. armhf_gcc=r"-mfp16-format=ieee -mfpu=neon-fp-armv8 -march=armv8.2-a\+fp16"
  426. )
  427. self.expect_flags(
  428. "asimddp", aarch64_gcc=r"-march=armv8.2-a\+dotprod"
  429. )
  430. self.expect_flags(
  431. # asimdfhm implies asimdhp
  432. "asimdfhm", aarch64_gcc=r"-march=armv8.2-a\+fp16\+fp16fml"
  433. )
  434. self.expect_flags(
  435. "asimddp asimdhp asimdfhm",
  436. aarch64_gcc=r"-march=armv8.2-a\+dotprod\+fp16\+fp16fml"
  437. )
  438. self.expect_flags(
  439. "vx vxe vxe2",
  440. s390x=r"-mzvector -march=arch13"
  441. )
  442. def test_targets_exceptions(self):
  443. for targets in (
  444. "bla bla", "/*@targets",
  445. "/*@targets */",
  446. "/*@targets unknown */",
  447. "/*@targets $unknown_policy avx2 */",
  448. "/*@targets #unknown_group avx2 */",
  449. "/*@targets $ */",
  450. "/*@targets # vsx */",
  451. "/*@targets #$ vsx */",
  452. "/*@targets vsx avx2 ) */",
  453. "/*@targets vsx avx2 (avx2 */",
  454. "/*@targets vsx avx2 () */",
  455. "/*@targets vsx avx2 ($autovec) */", # no features
  456. "/*@targets vsx avx2 (xxx) */",
  457. "/*@targets vsx avx2 (baseline) */",
  458. ) :
  459. try:
  460. self.expect_targets(
  461. targets,
  462. x86="", armhf="", ppc64="", s390x=""
  463. )
  464. if self.march() != "unknown":
  465. raise AssertionError(
  466. "excepted an exception for %s" % self.march()
  467. )
  468. except DistutilsError:
  469. if self.march() == "unknown":
  470. raise AssertionError("excepted no exceptions")
  471. def test_targets_syntax(self):
  472. for targets in (
  473. "/*@targets $keep_baseline sse vsx neon vx*/",
  474. "/*@targets,$keep_baseline,sse,vsx,neon vx*/",
  475. "/*@targets*$keep_baseline*sse*vsx*neon*vx*/",
  476. """
  477. /*
  478. ** @targets
  479. ** $keep_baseline, sse vsx,neon, vx
  480. */
  481. """,
  482. """
  483. /*
  484. ************@targets****************
  485. ** $keep_baseline, sse vsx, neon, vx
  486. ************************************
  487. */
  488. """,
  489. """
  490. /*
  491. /////////////@targets/////////////////
  492. //$keep_baseline//sse//vsx//neon//vx
  493. /////////////////////////////////////
  494. */
  495. """,
  496. """
  497. /*
  498. @targets
  499. $keep_baseline
  500. SSE VSX NEON VX*/
  501. """
  502. ) :
  503. self.expect_targets(targets,
  504. x86="sse", ppc64="vsx", armhf="neon", s390x="vx", unknown=""
  505. )
  506. def test_targets(self):
  507. # test skipping baseline features
  508. self.expect_targets(
  509. """
  510. /*@targets
  511. sse sse2 sse41 avx avx2 avx512f
  512. vsx vsx2 vsx3 vsx4
  513. neon neon_fp16 asimdhp asimddp
  514. vx vxe vxe2
  515. */
  516. """,
  517. baseline="avx vsx2 asimd vx vxe",
  518. x86="avx512f avx2", armhf="asimddp asimdhp", ppc64="vsx4 vsx3",
  519. s390x="vxe2"
  520. )
  521. # test skipping non-dispatch features
  522. self.expect_targets(
  523. """
  524. /*@targets
  525. sse41 avx avx2 avx512f
  526. vsx2 vsx3 vsx4
  527. asimd asimdhp asimddp
  528. vx vxe vxe2
  529. */
  530. """,
  531. baseline="", dispatch="sse41 avx2 vsx2 asimd asimddp vxe2",
  532. x86="avx2 sse41", armhf="asimddp asimd", ppc64="vsx2", s390x="vxe2"
  533. )
  534. # test skipping features that not supported
  535. self.expect_targets(
  536. """
  537. /*@targets
  538. sse2 sse41 avx2 avx512f
  539. vsx2 vsx3 vsx4
  540. neon asimdhp asimddp
  541. vx vxe vxe2
  542. */
  543. """,
  544. baseline="",
  545. trap_files=".*(avx2|avx512f|vsx3|vsx4|asimddp|vxe2).c",
  546. x86="sse41 sse2", ppc64="vsx2", armhf="asimdhp neon",
  547. s390x="vxe vx"
  548. )
  549. # test skipping features that implies each other
  550. self.expect_targets(
  551. """
  552. /*@targets
  553. sse sse2 avx fma3 avx2 avx512f avx512cd
  554. vsx vsx2 vsx3
  555. neon neon_vfpv4 neon_fp16 neon_fp16 asimd asimdhp
  556. asimddp asimdfhm
  557. */
  558. """,
  559. baseline="",
  560. x86_gcc="avx512cd avx512f avx2 fma3 avx sse2",
  561. x86_msvc="avx512cd avx2 avx sse2",
  562. x86_icc="avx512cd avx2 avx sse2",
  563. x86_iccw="avx512cd avx2 avx sse2",
  564. ppc64="vsx3 vsx2 vsx",
  565. ppc64le="vsx3 vsx2",
  566. armhf="asimdfhm asimddp asimdhp asimd neon_vfpv4 neon_fp16 neon",
  567. aarch64="asimdfhm asimddp asimdhp asimd"
  568. )
  569. def test_targets_policies(self):
  570. # 'keep_baseline', generate objects for baseline features
  571. self.expect_targets(
  572. """
  573. /*@targets
  574. $keep_baseline
  575. sse2 sse42 avx2 avx512f
  576. vsx2 vsx3
  577. neon neon_vfpv4 asimd asimddp
  578. vx vxe vxe2
  579. */
  580. """,
  581. baseline="sse41 avx2 vsx2 asimd vsx3 vxe",
  582. x86="avx512f avx2 sse42 sse2",
  583. ppc64="vsx3 vsx2",
  584. armhf="asimddp asimd neon_vfpv4 neon",
  585. # neon, neon_vfpv4, asimd implies each other
  586. aarch64="asimddp asimd",
  587. s390x="vxe2 vxe vx"
  588. )
  589. # 'keep_sort', leave the sort as-is
  590. self.expect_targets(
  591. """
  592. /*@targets
  593. $keep_baseline $keep_sort
  594. avx512f sse42 avx2 sse2
  595. vsx2 vsx3
  596. asimd neon neon_vfpv4 asimddp
  597. vxe vxe2
  598. */
  599. """,
  600. x86="avx512f sse42 avx2 sse2",
  601. ppc64="vsx2 vsx3",
  602. armhf="asimd neon neon_vfpv4 asimddp",
  603. # neon, neon_vfpv4, asimd implies each other
  604. aarch64="asimd asimddp",
  605. s390x="vxe vxe2"
  606. )
  607. # 'autovec', skipping features that can't be
  608. # vectorized by the compiler
  609. self.expect_targets(
  610. """
  611. /*@targets
  612. $keep_baseline $keep_sort $autovec
  613. avx512f avx2 sse42 sse41 sse2
  614. vsx3 vsx2
  615. asimddp asimd neon_vfpv4 neon
  616. */
  617. """,
  618. x86_gcc="avx512f avx2 sse42 sse41 sse2",
  619. x86_icc="avx512f avx2 sse42 sse41 sse2",
  620. x86_iccw="avx512f avx2 sse42 sse41 sse2",
  621. x86_msvc="avx512f avx2 sse2"
  622. if self.march() == 'x86' else "avx512f avx2",
  623. ppc64="vsx3 vsx2",
  624. armhf="asimddp asimd neon_vfpv4 neon",
  625. # neon, neon_vfpv4, asimd implies each other
  626. aarch64="asimddp asimd"
  627. )
  628. for policy in ("$maxopt", "$autovec"):
  629. # 'maxopt' and autovec set the max acceptable optimization flags
  630. self.expect_target_flags(
  631. "/*@targets baseline %s */" % policy,
  632. gcc={"baseline":".*-O3.*"}, icc={"baseline":".*-O3.*"},
  633. iccw={"baseline":".*/O3.*"}, msvc={"baseline":".*/O2.*"},
  634. unknown={"baseline":".*"}
  635. )
  636. # 'werror', force compilers to treat warnings as errors
  637. self.expect_target_flags(
  638. "/*@targets baseline $werror */",
  639. gcc={"baseline":".*-Werror.*"}, icc={"baseline":".*-Werror.*"},
  640. iccw={"baseline":".*/Werror.*"}, msvc={"baseline":".*/WX.*"},
  641. unknown={"baseline":".*"}
  642. )
  643. def test_targets_groups(self):
  644. self.expect_targets(
  645. """
  646. /*@targets $keep_baseline baseline #test_group */
  647. """,
  648. groups=dict(
  649. test_group=("""
  650. $keep_baseline
  651. asimddp sse2 vsx2 avx2 vsx3
  652. avx512f asimdhp
  653. """)
  654. ),
  655. x86="avx512f avx2 sse2 baseline",
  656. ppc64="vsx3 vsx2 baseline",
  657. armhf="asimddp asimdhp baseline"
  658. )
  659. # test skip duplicating and sorting
  660. self.expect_targets(
  661. """
  662. /*@targets
  663. * sse42 avx avx512f
  664. * #test_group_1
  665. * vsx2
  666. * #test_group_2
  667. * asimddp asimdfhm
  668. */
  669. """,
  670. groups=dict(
  671. test_group_1=("""
  672. VSX2 vsx3 asimd avx2 SSE41
  673. """),
  674. test_group_2=("""
  675. vsx2 vsx3 asImd aVx2 sse41
  676. """)
  677. ),
  678. x86="avx512f avx2 avx sse42 sse41",
  679. ppc64="vsx3 vsx2",
  680. # vsx2 part of the default baseline of ppc64le, option ("min")
  681. ppc64le="vsx3",
  682. armhf="asimdfhm asimddp asimd",
  683. # asimd part of the default baseline of aarch64, option ("min")
  684. aarch64="asimdfhm asimddp"
  685. )
  686. def test_targets_multi(self):
  687. self.expect_targets(
  688. """
  689. /*@targets
  690. (avx512_clx avx512_cnl) (asimdhp asimddp)
  691. */
  692. """,
  693. x86=r"\(avx512_clx avx512_cnl\)",
  694. armhf=r"\(asimdhp asimddp\)",
  695. )
  696. # test skipping implied features and auto-sort
  697. self.expect_targets(
  698. """
  699. /*@targets
  700. f16c (sse41 avx sse42) (sse3 avx2 avx512f)
  701. vsx2 (vsx vsx3 vsx2)
  702. (neon neon_vfpv4 asimd asimdhp asimddp)
  703. */
  704. """,
  705. x86="avx512f f16c avx",
  706. ppc64="vsx3 vsx2",
  707. ppc64le="vsx3", # vsx2 part of baseline
  708. armhf=r"\(asimdhp asimddp\)",
  709. )
  710. # test skipping implied features and keep sort
  711. self.expect_targets(
  712. """
  713. /*@targets $keep_sort
  714. (sse41 avx sse42) (sse3 avx2 avx512f)
  715. (vsx vsx3 vsx2)
  716. (asimddp neon neon_vfpv4 asimd asimdhp)
  717. (vx vxe vxe2)
  718. */
  719. """,
  720. x86="avx avx512f",
  721. ppc64="vsx3",
  722. armhf=r"\(asimdhp asimddp\)",
  723. s390x="vxe2"
  724. )
  725. # test compiler variety and avoiding duplicating
  726. self.expect_targets(
  727. """
  728. /*@targets $keep_sort
  729. fma3 avx2 (fma3 avx2) (avx2 fma3) avx2 fma3
  730. */
  731. """,
  732. x86_gcc=r"fma3 avx2 \(fma3 avx2\)",
  733. x86_icc="avx2", x86_iccw="avx2",
  734. x86_msvc="avx2"
  735. )
  736. def new_test(arch, cc):
  737. if is_standalone: return textwrap.dedent("""\
  738. class TestCCompilerOpt_{class_name}(_Test_CCompilerOpt, unittest.TestCase):
  739. arch = '{arch}'
  740. cc = '{cc}'
  741. def __init__(self, methodName="runTest"):
  742. unittest.TestCase.__init__(self, methodName)
  743. self.setup_class()
  744. """).format(
  745. class_name=arch + '_' + cc, arch=arch, cc=cc
  746. )
  747. return textwrap.dedent("""\
  748. class TestCCompilerOpt_{class_name}(_Test_CCompilerOpt):
  749. arch = '{arch}'
  750. cc = '{cc}'
  751. """).format(
  752. class_name=arch + '_' + cc, arch=arch, cc=cc
  753. )
  754. """
  755. if 1 and is_standalone:
  756. FakeCCompilerOpt.fake_info = "x86_icc"
  757. cco = FakeCCompilerOpt(None, cpu_baseline="avx2")
  758. print(' '.join(cco.cpu_baseline_names()))
  759. print(cco.cpu_baseline_flags())
  760. unittest.main()
  761. sys.exit()
  762. """
  763. for arch, compilers in arch_compilers.items():
  764. for cc in compilers:
  765. exec(new_test(arch, cc))
  766. if is_standalone:
  767. unittest.main()