cuda_helpers.h 475 B

123456789101112131415161718
  1. #pragma once
  2. namespace vision {
  3. namespace ops {
  4. #define CUDA_1D_KERNEL_LOOP_T(i, n, index_t) \
  5. for (index_t i = (blockIdx.x * blockDim.x) + threadIdx.x; i < (n); \
  6. i += (blockDim.x * gridDim.x))
  7. #define CUDA_1D_KERNEL_LOOP(i, n) CUDA_1D_KERNEL_LOOP_T(i, n, int)
  8. template <typename integer>
  9. constexpr __host__ __device__ inline integer ceil_div(integer n, integer m) {
  10. return (n + m - 1) / m;
  11. }
  12. } // namespace ops
  13. } // namespace vision