intrinsics.h 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243
  1. #pragma once
  2. #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
  3. /* GCC or clang-compatible compiler, targeting x86/x86-64 */
  4. #include <x86intrin.h>
  5. #elif defined(__clang__) && (defined(__ARM_NEON__) || defined(__aarch64__))
  6. /* Clang-compatible compiler, targeting arm neon */
  7. #include <arm_neon.h>
  8. #elif defined(_MSC_VER)
  9. /* Microsoft C/C++-compatible compiler */
  10. #include <intrin.h>
  11. #if _MSC_VER <= 1900
  12. #define _mm256_extract_epi64(X, Y) (_mm_extract_epi64(_mm256_extractf128_si256(X, Y >> 1), Y % 2))
  13. #define _mm256_extract_epi32(X, Y) (_mm_extract_epi32(_mm256_extractf128_si256(X, Y >> 2), Y % 4))
  14. #define _mm256_extract_epi16(X, Y) (_mm_extract_epi16(_mm256_extractf128_si256(X, Y >> 3), Y % 8))
  15. #define _mm256_extract_epi8(X, Y) (_mm_extract_epi8(_mm256_extractf128_si256(X, Y >> 4), Y % 16))
  16. #endif
  17. #elif defined(__GNUC__) && (defined(__ARM_NEON__) || defined(__aarch64__))
  18. /* GCC-compatible compiler, targeting ARM with NEON */
  19. #include <arm_neon.h>
  20. #if defined (MISSING_ARM_VLD1)
  21. #include <ATen/cpu/vec/vec256/missing_vld1_neon.h>
  22. #elif defined (MISSING_ARM_VST1)
  23. #include <ATen/cpu/vec/vec256/missing_vst1_neon.h>
  24. #endif
  25. #elif defined(__GNUC__) && defined(__IWMMXT__)
  26. /* GCC-compatible compiler, targeting ARM with WMMX */
  27. #include <mmintrin.h>
  28. #elif defined(__s390x__)
  29. // targets Z/architecture
  30. // we will include vecintrin later
  31. #elif (defined(__GNUC__) || defined(__xlC__)) && \
  32. (defined(__VEC__) || defined(__ALTIVEC__))
  33. /* XLC or GCC-compatible compiler, targeting PowerPC with VMX/VSX */
  34. #include <altivec.h>
  35. /* We need to undef those tokens defined by <altivec.h> to avoid conflicts
  36. with the C++ types. => Can still use __bool/__vector */
  37. #undef bool
  38. #undef vector
  39. #undef pixel
  40. #elif defined(__GNUC__) && defined(__SPE__)
  41. /* GCC-compatible compiler, targeting PowerPC with SPE */
  42. #include <spe.h>
  43. #endif