setup_common.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476
  1. # Code common to build tools
  2. import copy
  3. import pathlib
  4. import sys
  5. import textwrap
  6. from numpy.distutils.misc_util import mingw32
  7. #-------------------
  8. # Versioning support
  9. #-------------------
  10. # How to change C_API_VERSION ?
  11. # - increase C_API_VERSION value
  12. # - record the hash for the new C API with the cversions.py script
  13. # and add the hash to cversions.txt
  14. # The hash values are used to remind developers when the C API number was not
  15. # updated - generates a MismatchCAPIWarning warning which is turned into an
  16. # exception for released version.
  17. # Binary compatibility version number. This number is increased whenever the
  18. # C-API is changed such that binary compatibility is broken, i.e. whenever a
  19. # recompile of extension modules is needed.
  20. C_ABI_VERSION = 0x01000009
  21. # Minor API version. This number is increased whenever a change is made to the
  22. # C-API -- whether it breaks binary compatibility or not. Some changes, such
  23. # as adding a function pointer to the end of the function table, can be made
  24. # without breaking binary compatibility. In this case, only the C_API_VERSION
  25. # (*not* C_ABI_VERSION) would be increased. Whenever binary compatibility is
  26. # broken, both C_API_VERSION and C_ABI_VERSION should be increased.
  27. #
  28. # The version needs to be kept in sync with that in cversions.txt.
  29. #
  30. # 0x00000008 - 1.7.x
  31. # 0x00000009 - 1.8.x
  32. # 0x00000009 - 1.9.x
  33. # 0x0000000a - 1.10.x
  34. # 0x0000000a - 1.11.x
  35. # 0x0000000a - 1.12.x
  36. # 0x0000000b - 1.13.x
  37. # 0x0000000c - 1.14.x
  38. # 0x0000000c - 1.15.x
  39. # 0x0000000d - 1.16.x
  40. # 0x0000000d - 1.19.x
  41. # 0x0000000e - 1.20.x
  42. # 0x0000000e - 1.21.x
  43. # 0x0000000f - 1.22.x
  44. # 0x00000010 - 1.23.x
  45. # 0x00000010 - 1.24.x
  46. C_API_VERSION = 0x00000010
  47. class MismatchCAPIError(ValueError):
  48. pass
  49. def get_api_versions(apiversion, codegen_dir):
  50. """
  51. Return current C API checksum and the recorded checksum.
  52. Return current C API checksum and the recorded checksum for the given
  53. version of the C API version.
  54. """
  55. # Compute the hash of the current API as defined in the .txt files in
  56. # code_generators
  57. sys.path.insert(0, codegen_dir)
  58. try:
  59. m = __import__('genapi')
  60. numpy_api = __import__('numpy_api')
  61. curapi_hash = m.fullapi_hash(numpy_api.full_api)
  62. apis_hash = m.get_versions_hash()
  63. finally:
  64. del sys.path[0]
  65. return curapi_hash, apis_hash[apiversion]
  66. def check_api_version(apiversion, codegen_dir):
  67. """Emits a MismatchCAPIWarning if the C API version needs updating."""
  68. curapi_hash, api_hash = get_api_versions(apiversion, codegen_dir)
  69. # If different hash, it means that the api .txt files in
  70. # codegen_dir have been updated without the API version being
  71. # updated. Any modification in those .txt files should be reflected
  72. # in the api and eventually abi versions.
  73. # To compute the checksum of the current API, use numpy/core/cversions.py
  74. if not curapi_hash == api_hash:
  75. msg = ("API mismatch detected, the C API version "
  76. "numbers have to be updated. Current C api version is "
  77. f"{apiversion}, with checksum {curapi_hash}, but recorded "
  78. f"checksum in core/codegen_dir/cversions.txt is {api_hash}. If "
  79. "functions were added in the C API, you have to update "
  80. f"C_API_VERSION in {__file__}."
  81. )
  82. raise MismatchCAPIError(msg)
  83. FUNC_CALL_ARGS = {}
  84. def set_sig(sig):
  85. prefix, _, args = sig.partition("(")
  86. args = args.rpartition(")")[0]
  87. funcname = prefix.rpartition(" ")[-1]
  88. args = [arg.strip() for arg in args.split(",")]
  89. # We use {0} because 0 alone cannot be cast to complex on MSVC in C:
  90. FUNC_CALL_ARGS[funcname] = ", ".join("(%s){0}" % arg for arg in args)
  91. for file in [
  92. "feature_detection_locale.h",
  93. "feature_detection_math.h",
  94. "feature_detection_cmath.h",
  95. "feature_detection_misc.h",
  96. "feature_detection_stdio.h",
  97. ]:
  98. with open(pathlib.Path(__file__).parent / file) as f:
  99. for line in f:
  100. if line.startswith("#"):
  101. continue
  102. if not line.strip():
  103. continue
  104. set_sig(line)
  105. # Mandatory functions: if not found, fail the build
  106. # Some of these can still be blocklisted if the C99 implementation
  107. # is buggy, see numpy/core/src/common/npy_config.h
  108. MANDATORY_FUNCS = [
  109. "sin", "cos", "tan", "sinh", "cosh", "tanh", "fabs",
  110. "floor", "ceil", "sqrt", "log10", "log", "exp", "asin",
  111. "acos", "atan", "fmod", 'modf', 'frexp', 'ldexp',
  112. "expm1", "log1p", "acosh", "asinh", "atanh",
  113. "rint", "trunc", "exp2",
  114. "copysign", "nextafter", "strtoll", "strtoull", "cbrt",
  115. "log2", "pow", "hypot", "atan2",
  116. "creal", "cimag", "conj"
  117. ]
  118. OPTIONAL_LOCALE_FUNCS = ["strtold_l"]
  119. OPTIONAL_FILE_FUNCS = ["ftello", "fseeko", "fallocate"]
  120. OPTIONAL_MISC_FUNCS = ["backtrace", "madvise"]
  121. # variable attributes tested via "int %s a" % attribute
  122. OPTIONAL_VARIABLE_ATTRIBUTES = ["__thread", "__declspec(thread)"]
  123. # Subset of OPTIONAL_*_FUNCS which may already have HAVE_* defined by Python.h
  124. OPTIONAL_FUNCS_MAYBE = [
  125. "ftello", "fseeko"
  126. ]
  127. C99_COMPLEX_TYPES = [
  128. 'complex double', 'complex float', 'complex long double'
  129. ]
  130. C99_COMPLEX_FUNCS = [
  131. "cabs", "cacos", "cacosh", "carg", "casin", "casinh", "catan",
  132. "catanh", "cexp", "clog", "cpow", "csqrt",
  133. # The long double variants (like csinl) should be mandatory on C11,
  134. # but are missing in FreeBSD. Issue gh-22850
  135. "csin", "csinh", "ccos", "ccosh", "ctan", "ctanh",
  136. ]
  137. OPTIONAL_HEADERS = [
  138. # sse headers only enabled automatically on amd64/x32 builds
  139. "xmmintrin.h", # SSE
  140. "emmintrin.h", # SSE2
  141. "immintrin.h", # AVX
  142. "features.h", # for glibc version linux
  143. "xlocale.h", # see GH#8367
  144. "dlfcn.h", # dladdr
  145. "execinfo.h", # backtrace
  146. "libunwind.h", # backtrace for LLVM/Clang using libunwind
  147. "sys/mman.h", #madvise
  148. ]
  149. # optional gcc compiler builtins and their call arguments and optional a
  150. # required header and definition name (HAVE_ prepended)
  151. # call arguments are required as the compiler will do strict signature checking
  152. OPTIONAL_INTRINSICS = [("__builtin_isnan", '5.'),
  153. ("__builtin_isinf", '5.'),
  154. ("__builtin_isfinite", '5.'),
  155. ("__builtin_bswap32", '5u'),
  156. ("__builtin_bswap64", '5u'),
  157. ("__builtin_expect", '5, 0'),
  158. # Test `long long` for arm+clang 13 (gh-22811,
  159. # but we use all versions of __builtin_mul_overflow):
  160. ("__builtin_mul_overflow", '(long long)5, 5, (int*)5'),
  161. # MMX only needed for icc, but some clangs don't have it
  162. ("_m_from_int64", '0', "emmintrin.h"),
  163. ("_mm_load_ps", '(float*)0', "xmmintrin.h"), # SSE
  164. ("_mm_prefetch", '(float*)0, _MM_HINT_NTA',
  165. "xmmintrin.h"), # SSE
  166. ("_mm_load_pd", '(double*)0', "emmintrin.h"), # SSE2
  167. ("__builtin_prefetch", "(float*)0, 0, 3"),
  168. # check that the linker can handle avx
  169. ("__asm__ volatile", '"vpand %xmm1, %xmm2, %xmm3"',
  170. "stdio.h", "LINK_AVX"),
  171. ("__asm__ volatile", '"vpand %ymm1, %ymm2, %ymm3"',
  172. "stdio.h", "LINK_AVX2"),
  173. ("__asm__ volatile", '"vpaddd %zmm1, %zmm2, %zmm3"',
  174. "stdio.h", "LINK_AVX512F"),
  175. ("__asm__ volatile", '"vfpclasspd $0x40, %zmm15, %k6\\n"\
  176. "vmovdqu8 %xmm0, %xmm1\\n"\
  177. "vpbroadcastmb2q %k0, %xmm0\\n"',
  178. "stdio.h", "LINK_AVX512_SKX"),
  179. ("__asm__ volatile", '"xgetbv"', "stdio.h", "XGETBV"),
  180. ]
  181. # function attributes
  182. # tested via "int %s %s(void *);" % (attribute, name)
  183. # function name will be converted to HAVE_<upper-case-name> preprocessor macro
  184. OPTIONAL_FUNCTION_ATTRIBUTES = [('__attribute__((optimize("unroll-loops")))',
  185. 'attribute_optimize_unroll_loops'),
  186. ('__attribute__((optimize("O3")))',
  187. 'attribute_optimize_opt_3'),
  188. ('__attribute__((optimize("O2")))',
  189. 'attribute_optimize_opt_2'),
  190. ('__attribute__((nonnull (1)))',
  191. 'attribute_nonnull'),
  192. ]
  193. OPTIONAL_FUNCTION_ATTRIBUTES_AVX = [('__attribute__((target ("avx")))',
  194. 'attribute_target_avx'),
  195. ('__attribute__((target ("avx2")))',
  196. 'attribute_target_avx2'),
  197. ('__attribute__((target ("avx512f")))',
  198. 'attribute_target_avx512f'),
  199. ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
  200. 'attribute_target_avx512_skx'),
  201. ]
  202. # function attributes with intrinsics
  203. # To ensure your compiler can compile avx intrinsics with just the attributes
  204. # gcc 4.8.4 support attributes but not with intrisics
  205. # tested via "#include<%s> int %s %s(void *){code; return 0;};" % (header, attribute, name, code)
  206. # function name will be converted to HAVE_<upper-case-name> preprocessor macro
  207. # The _mm512_castps_si512 instruction is specific check for AVX-512F support
  208. # in gcc-4.9 which is missing a subset of intrinsics. See
  209. # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61878
  210. OPTIONAL_FUNCTION_ATTRIBUTES_WITH_INTRINSICS_AVX = [
  211. ('__attribute__((target("avx2,fma")))',
  212. 'attribute_target_avx2_with_intrinsics',
  213. '__m256 temp = _mm256_set1_ps(1.0); temp = \
  214. _mm256_fmadd_ps(temp, temp, temp)',
  215. 'immintrin.h'),
  216. ('__attribute__((target("avx512f")))',
  217. 'attribute_target_avx512f_with_intrinsics',
  218. '__m512i temp = _mm512_castps_si512(_mm512_set1_ps(1.0))',
  219. 'immintrin.h'),
  220. ('__attribute__((target ("avx512f,avx512dq,avx512bw,avx512vl,avx512cd")))',
  221. 'attribute_target_avx512_skx_with_intrinsics',
  222. '__mmask8 temp = _mm512_fpclass_pd_mask(_mm512_set1_pd(1.0), 0x01);\
  223. __m512i unused_temp = \
  224. _mm512_castps_si512(_mm512_set1_ps(1.0));\
  225. _mm_mask_storeu_epi8(NULL, 0xFF, _mm_broadcastmb_epi64(temp))',
  226. 'immintrin.h'),
  227. ]
  228. def fname2def(name):
  229. return "HAVE_%s" % name.upper()
  230. def sym2def(symbol):
  231. define = symbol.replace(' ', '')
  232. return define.upper()
  233. def type2def(symbol):
  234. define = symbol.replace(' ', '_')
  235. return define.upper()
  236. # Code to detect long double representation taken from MPFR m4 macro
  237. def check_long_double_representation(cmd):
  238. cmd._check_compiler()
  239. body = LONG_DOUBLE_REPRESENTATION_SRC % {'type': 'long double'}
  240. # Disable whole program optimization (the default on vs2015, with python 3.5+)
  241. # which generates intermediary object files and prevents checking the
  242. # float representation.
  243. if sys.platform == "win32" and not mingw32():
  244. try:
  245. cmd.compiler.compile_options.remove("/GL")
  246. except (AttributeError, ValueError):
  247. pass
  248. # Disable multi-file interprocedural optimization in the Intel compiler on Linux
  249. # which generates intermediary object files and prevents checking the
  250. # float representation.
  251. elif (sys.platform != "win32"
  252. and cmd.compiler.compiler_type.startswith('intel')
  253. and '-ipo' in cmd.compiler.cc_exe):
  254. newcompiler = cmd.compiler.cc_exe.replace(' -ipo', '')
  255. cmd.compiler.set_executables(
  256. compiler=newcompiler,
  257. compiler_so=newcompiler,
  258. compiler_cxx=newcompiler,
  259. linker_exe=newcompiler,
  260. linker_so=newcompiler + ' -shared'
  261. )
  262. # We need to use _compile because we need the object filename
  263. src, obj = cmd._compile(body, None, None, 'c')
  264. try:
  265. ltype = long_double_representation(pyod(obj))
  266. return ltype
  267. except ValueError:
  268. # try linking to support CC="gcc -flto" or icc -ipo
  269. # struct needs to be volatile so it isn't optimized away
  270. # additionally "clang -flto" requires the foo struct to be used
  271. body = body.replace('struct', 'volatile struct')
  272. body += "int main(void) { return foo.before[0]; }\n"
  273. src, obj = cmd._compile(body, None, None, 'c')
  274. cmd.temp_files.append("_configtest")
  275. cmd.compiler.link_executable([obj], "_configtest")
  276. ltype = long_double_representation(pyod("_configtest"))
  277. return ltype
  278. finally:
  279. cmd._clean()
  280. LONG_DOUBLE_REPRESENTATION_SRC = r"""
  281. /* "before" is 16 bytes to ensure there's no padding between it and "x".
  282. * We're not expecting any "long double" bigger than 16 bytes or with
  283. * alignment requirements stricter than 16 bytes. */
  284. typedef %(type)s test_type;
  285. struct {
  286. char before[16];
  287. test_type x;
  288. char after[8];
  289. } foo = {
  290. { '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
  291. '\001', '\043', '\105', '\147', '\211', '\253', '\315', '\357' },
  292. -123456789.0,
  293. { '\376', '\334', '\272', '\230', '\166', '\124', '\062', '\020' }
  294. };
  295. """
  296. def pyod(filename):
  297. """Python implementation of the od UNIX utility (od -b, more exactly).
  298. Parameters
  299. ----------
  300. filename : str
  301. name of the file to get the dump from.
  302. Returns
  303. -------
  304. out : seq
  305. list of lines of od output
  306. Notes
  307. -----
  308. We only implement enough to get the necessary information for long double
  309. representation, this is not intended as a compatible replacement for od.
  310. """
  311. out = []
  312. with open(filename, 'rb') as fid:
  313. yo2 = [oct(o)[2:] for o in fid.read()]
  314. for i in range(0, len(yo2), 16):
  315. line = ['%07d' % int(oct(i)[2:])]
  316. line.extend(['%03d' % int(c) for c in yo2[i:i+16]])
  317. out.append(" ".join(line))
  318. return out
  319. _BEFORE_SEQ = ['000', '000', '000', '000', '000', '000', '000', '000',
  320. '001', '043', '105', '147', '211', '253', '315', '357']
  321. _AFTER_SEQ = ['376', '334', '272', '230', '166', '124', '062', '020']
  322. _IEEE_DOUBLE_BE = ['301', '235', '157', '064', '124', '000', '000', '000']
  323. _IEEE_DOUBLE_LE = _IEEE_DOUBLE_BE[::-1]
  324. _INTEL_EXTENDED_12B = ['000', '000', '000', '000', '240', '242', '171', '353',
  325. '031', '300', '000', '000']
  326. _INTEL_EXTENDED_16B = ['000', '000', '000', '000', '240', '242', '171', '353',
  327. '031', '300', '000', '000', '000', '000', '000', '000']
  328. _MOTOROLA_EXTENDED_12B = ['300', '031', '000', '000', '353', '171',
  329. '242', '240', '000', '000', '000', '000']
  330. _IEEE_QUAD_PREC_BE = ['300', '031', '326', '363', '105', '100', '000', '000',
  331. '000', '000', '000', '000', '000', '000', '000', '000']
  332. _IEEE_QUAD_PREC_LE = _IEEE_QUAD_PREC_BE[::-1]
  333. _IBM_DOUBLE_DOUBLE_BE = (['301', '235', '157', '064', '124', '000', '000', '000'] +
  334. ['000'] * 8)
  335. _IBM_DOUBLE_DOUBLE_LE = (['000', '000', '000', '124', '064', '157', '235', '301'] +
  336. ['000'] * 8)
  337. def long_double_representation(lines):
  338. """Given a binary dump as given by GNU od -b, look for long double
  339. representation."""
  340. # Read contains a list of 32 items, each item is a byte (in octal
  341. # representation, as a string). We 'slide' over the output until read is of
  342. # the form before_seq + content + after_sequence, where content is the long double
  343. # representation:
  344. # - content is 12 bytes: 80 bits Intel representation
  345. # - content is 16 bytes: 80 bits Intel representation (64 bits) or quad precision
  346. # - content is 8 bytes: same as double (not implemented yet)
  347. read = [''] * 32
  348. saw = None
  349. for line in lines:
  350. # we skip the first word, as od -b output an index at the beginning of
  351. # each line
  352. for w in line.split()[1:]:
  353. read.pop(0)
  354. read.append(w)
  355. # If the end of read is equal to the after_sequence, read contains
  356. # the long double
  357. if read[-8:] == _AFTER_SEQ:
  358. saw = copy.copy(read)
  359. # if the content was 12 bytes, we only have 32 - 8 - 12 = 12
  360. # "before" bytes. In other words the first 4 "before" bytes went
  361. # past the sliding window.
  362. if read[:12] == _BEFORE_SEQ[4:]:
  363. if read[12:-8] == _INTEL_EXTENDED_12B:
  364. return 'INTEL_EXTENDED_12_BYTES_LE'
  365. if read[12:-8] == _MOTOROLA_EXTENDED_12B:
  366. return 'MOTOROLA_EXTENDED_12_BYTES_BE'
  367. # if the content was 16 bytes, we are left with 32-8-16 = 16
  368. # "before" bytes, so 8 went past the sliding window.
  369. elif read[:8] == _BEFORE_SEQ[8:]:
  370. if read[8:-8] == _INTEL_EXTENDED_16B:
  371. return 'INTEL_EXTENDED_16_BYTES_LE'
  372. elif read[8:-8] == _IEEE_QUAD_PREC_BE:
  373. return 'IEEE_QUAD_BE'
  374. elif read[8:-8] == _IEEE_QUAD_PREC_LE:
  375. return 'IEEE_QUAD_LE'
  376. elif read[8:-8] == _IBM_DOUBLE_DOUBLE_LE:
  377. return 'IBM_DOUBLE_DOUBLE_LE'
  378. elif read[8:-8] == _IBM_DOUBLE_DOUBLE_BE:
  379. return 'IBM_DOUBLE_DOUBLE_BE'
  380. # if the content was 8 bytes, left with 32-8-8 = 16 bytes
  381. elif read[:16] == _BEFORE_SEQ:
  382. if read[16:-8] == _IEEE_DOUBLE_LE:
  383. return 'IEEE_DOUBLE_LE'
  384. elif read[16:-8] == _IEEE_DOUBLE_BE:
  385. return 'IEEE_DOUBLE_BE'
  386. if saw is not None:
  387. raise ValueError("Unrecognized format (%s)" % saw)
  388. else:
  389. # We never detected the after_sequence
  390. raise ValueError("Could not lock sequences (%s)" % saw)
  391. def check_for_right_shift_internal_compiler_error(cmd):
  392. """
  393. On our arm CI, this fails with an internal compilation error
  394. The failure looks like the following, and can be reproduced on ARM64 GCC 5.4:
  395. <source>: In function 'right_shift':
  396. <source>:4:20: internal compiler error: in expand_shift_1, at expmed.c:2349
  397. ip1[i] = ip1[i] >> in2;
  398. ^
  399. Please submit a full bug report,
  400. with preprocessed source if appropriate.
  401. See <http://gcc.gnu.org/bugs.html> for instructions.
  402. Compiler returned: 1
  403. This function returns True if this compiler bug is present, and we need to
  404. turn off optimization for the function
  405. """
  406. cmd._check_compiler()
  407. has_optimize = cmd.try_compile(textwrap.dedent("""\
  408. __attribute__((optimize("O3"))) void right_shift() {}
  409. """), None, None)
  410. if not has_optimize:
  411. return False
  412. no_err = cmd.try_compile(textwrap.dedent("""\
  413. typedef long the_type; /* fails also for unsigned and long long */
  414. __attribute__((optimize("O3"))) void right_shift(the_type in2, the_type *ip1, int n) {
  415. for (int i = 0; i < n; i++) {
  416. if (in2 < (the_type)sizeof(the_type) * 8) {
  417. ip1[i] = ip1[i] >> in2;
  418. }
  419. }
  420. }
  421. """), None, None)
  422. return not no_err