thread_constants.h 611 B

12345678910111213141516171819202122
  1. #pragma once
  2. #include <c10/macros/Macros.h>
  3. // Marks a lambda as executable on both the host and device. The __host__
  4. // attribute is important so that we can access static type information from
  5. // the host, even if the function is typically only executed on the device.
  6. #ifndef GPU_LAMBDA
  7. #define GPU_LAMBDA __host__ __device__
  8. #endif
  9. #if defined(USE_ROCM)
  10. constexpr int num_threads() {
  11. return 256;
  12. }
  13. #else
  14. constexpr uint32_t num_threads() {
  15. return C10_WARP_SIZE * 4;
  16. }
  17. #endif
  18. constexpr int thread_work_size() { return 4; }
  19. constexpr int block_work_size() { return thread_work_size() * num_threads(); }