extra_avx512f_reduce.c 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041
  1. #include <immintrin.h>
  2. /**
  3. * The following intrinsics don't have direct native support but compilers
  4. * tend to emulate them.
  5. * They're usually supported by gcc >= 7.1, clang >= 4 and icc >= 19
  6. */
  7. int main(void)
  8. {
  9. __m512 one_ps = _mm512_set1_ps(1.0f);
  10. __m512d one_pd = _mm512_set1_pd(1.0);
  11. __m512i one_i64 = _mm512_set1_epi64(1);
  12. // add
  13. float sum_ps = _mm512_reduce_add_ps(one_ps);
  14. double sum_pd = _mm512_reduce_add_pd(one_pd);
  15. int sum_int = (int)_mm512_reduce_add_epi64(one_i64);
  16. sum_int += (int)_mm512_reduce_add_epi32(one_i64);
  17. // mul
  18. sum_ps += _mm512_reduce_mul_ps(one_ps);
  19. sum_pd += _mm512_reduce_mul_pd(one_pd);
  20. sum_int += (int)_mm512_reduce_mul_epi64(one_i64);
  21. sum_int += (int)_mm512_reduce_mul_epi32(one_i64);
  22. // min
  23. sum_ps += _mm512_reduce_min_ps(one_ps);
  24. sum_pd += _mm512_reduce_min_pd(one_pd);
  25. sum_int += (int)_mm512_reduce_min_epi32(one_i64);
  26. sum_int += (int)_mm512_reduce_min_epu32(one_i64);
  27. sum_int += (int)_mm512_reduce_min_epi64(one_i64);
  28. // max
  29. sum_ps += _mm512_reduce_max_ps(one_ps);
  30. sum_pd += _mm512_reduce_max_pd(one_pd);
  31. sum_int += (int)_mm512_reduce_max_epi32(one_i64);
  32. sum_int += (int)_mm512_reduce_max_epu32(one_i64);
  33. sum_int += (int)_mm512_reduce_max_epi64(one_i64);
  34. // and
  35. sum_int += (int)_mm512_reduce_and_epi32(one_i64);
  36. sum_int += (int)_mm512_reduce_and_epi64(one_i64);
  37. // or
  38. sum_int += (int)_mm512_reduce_or_epi32(one_i64);
  39. sum_int += (int)_mm512_reduce_or_epi64(one_i64);
  40. return (int)sum_ps + (int)sum_pd + sum_int;
  41. }