123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102 |
- #pragma once
- #include <c10/cuda/CUDADeviceAssertionHost.h>
- #include <c10/cuda/CUDAMacros.h>
- #include <c10/cuda/CUDAMiscFunctions.h>
- #include <c10/macros/Macros.h>
- #include <c10/util/Exception.h>
- #include <c10/util/irange.h>
- #include <cuda.h>
- // Note [CHECK macro]
- // ~~~~~~~~~~~~~~~~~~
- // This is a macro so that AT_ERROR can get accurate __LINE__
- // and __FILE__ information. We could split this into a short
- // macro and a function implementation if we pass along __LINE__
- // and __FILE__, but no one has found this worth doing.
- // Used to denote errors from CUDA framework.
- // This needs to be declared here instead util/Exception.h for proper conversion
- // during hipify.
- namespace c10 {
- class C10_CUDA_API CUDAError : public c10::Error {
- using Error::Error;
- };
- } // namespace c10
- #define C10_CUDA_CHECK(EXPR) \
- do { \
- const cudaError_t __err = EXPR; \
- c10::cuda::c10_cuda_check_implementation( \
- static_cast<int32_t>(__err), \
- __FILE__, \
- __func__, /* Line number data type not well-defined between \
- compilers, so we perform an explicit cast */ \
- static_cast<uint32_t>(__LINE__), \
- true); \
- } while (0)
- #define C10_CUDA_CHECK_WARN(EXPR) \
- do { \
- const cudaError_t __err = EXPR; \
- if (C10_UNLIKELY(__err != cudaSuccess)) { \
- auto error_unused C10_UNUSED = cudaGetLastError(); \
- (void)error_unused; \
- TORCH_WARN("CUDA warning: ", cudaGetErrorString(__err)); \
- } \
- } while (0)
- // Indicates that a CUDA error is handled in a non-standard way
- #define C10_CUDA_ERROR_HANDLED(EXPR) EXPR
- // Intentionally ignore a CUDA error
- #define C10_CUDA_IGNORE_ERROR(EXPR) \
- do { \
- const cudaError_t __err = EXPR; \
- if (C10_UNLIKELY(__err != cudaSuccess)) { \
- cudaError_t error_unused C10_UNUSED = cudaGetLastError(); \
- (void)error_unused; \
- } \
- } while (0)
- // Clear the last CUDA error
- #define C10_CUDA_CLEAR_ERROR() \
- do { \
- cudaError_t error_unused C10_UNUSED = cudaGetLastError(); \
- (void)error_unused; \
- } while (0)
- // This should be used directly after every kernel launch to ensure
- // the launch happened correctly and provide an early, close-to-source
- // diagnostic if it didn't.
- #define C10_CUDA_KERNEL_LAUNCH_CHECK() C10_CUDA_CHECK(cudaGetLastError())
- /// Launches a CUDA kernel appending to it all the information need to handle
- /// device-side assertion failures. Checks that the launch was successful.
- #define TORCH_DSA_KERNEL_LAUNCH( \
- kernel, blocks, threads, shared_mem, stream, ...) \
- do { \
- auto& launch_registry = \
- c10::cuda::CUDAKernelLaunchRegistry::get_singleton_ref(); \
- kernel<<<blocks, threads, shared_mem, stream>>>( \
- __VA_ARGS__, \
- launch_registry.get_uvm_assertions_ptr_for_current_device(), \
- launch_registry.insert( \
- __FILE__, __FUNCTION__, __LINE__, #kernel, stream.id())); \
- C10_CUDA_KERNEL_LAUNCH_CHECK(); \
- } while (0)
- namespace c10 {
- namespace cuda {
- /// In the event of a CUDA failure, formats a nice error message about that
- /// failure and also checks for device-side assertion failures
- C10_CUDA_API void c10_cuda_check_implementation(
- const int32_t err,
- const char* filename,
- const char* function_name,
- const int line_number,
- const bool include_device_assertions);
- } // namespace cuda
- } // namespace c10
|