context_impl.cc 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. // Ceres Solver - A fast non-linear least squares minimizer
  2. // Copyright 2023 Google Inc. All rights reserved.
  3. // http://ceres-solver.org/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are met:
  7. //
  8. // * Redistributions of source code must retain the above copyright notice,
  9. // this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above copyright notice,
  11. // this list of conditions and the following disclaimer in the documentation
  12. // and/or other materials provided with the distribution.
  13. // * Neither the name of Google Inc. nor the names of its contributors may be
  14. // used to endorse or promote products derived from this software without
  15. // specific prior written permission.
  16. //
  17. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18. // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21. // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22. // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23. // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24. // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25. // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26. // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27. // POSSIBILITY OF SUCH DAMAGE.
  28. //
  29. // Author: vitus@google.com (Michael Vitus)
  30. #include "ceres/context_impl.h"
  31. #include <string>
  32. #include "ceres/internal/config.h"
  33. #include "ceres/stringprintf.h"
  34. #include "ceres/wall_time.h"
  35. #ifndef CERES_NO_CUDA
  36. #include "cublas_v2.h"
  37. #include "cuda_runtime.h"
  38. #include "cusolverDn.h"
  39. #endif // CERES_NO_CUDA
  40. namespace ceres::internal {
  41. ContextImpl::ContextImpl() = default;
  42. #ifndef CERES_NO_CUDA
  43. void ContextImpl::TearDown() {
  44. if (cusolver_handle_ != nullptr) {
  45. cusolverDnDestroy(cusolver_handle_);
  46. cusolver_handle_ = nullptr;
  47. }
  48. if (cublas_handle_ != nullptr) {
  49. cublasDestroy(cublas_handle_);
  50. cublas_handle_ = nullptr;
  51. }
  52. if (cusparse_handle_ != nullptr) {
  53. cusparseDestroy(cusparse_handle_);
  54. cusparse_handle_ = nullptr;
  55. }
  56. for (auto& s : streams_) {
  57. if (s != nullptr) {
  58. cudaStreamDestroy(s);
  59. s = nullptr;
  60. }
  61. }
  62. is_cuda_initialized_ = false;
  63. }
  64. std::string ContextImpl::CudaConfigAsString() const {
  65. return ceres::internal::StringPrintf(
  66. "======================= CUDA Device Properties ======================\n"
  67. "Cuda version : %d.%d\n"
  68. "Device ID : %d\n"
  69. "Device name : %s\n"
  70. "Total GPU memory : %6.f MiB\n"
  71. "GPU memory available : %6.f MiB\n"
  72. "Compute capability : %d.%d\n"
  73. "Warp size : %d\n"
  74. "Max threads per block : %d\n"
  75. "Max threads per dim : %d %d %d\n"
  76. "Max grid size : %d %d %d\n"
  77. "Multiprocessor count : %d\n"
  78. "cudaMallocAsync supported : %s\n"
  79. "====================================================================",
  80. cuda_version_major_,
  81. cuda_version_minor_,
  82. gpu_device_id_in_use_,
  83. gpu_device_properties_.name,
  84. gpu_device_properties_.totalGlobalMem / 1024.0 / 1024.0,
  85. GpuMemoryAvailable() / 1024.0 / 1024.0,
  86. gpu_device_properties_.major,
  87. gpu_device_properties_.minor,
  88. gpu_device_properties_.warpSize,
  89. gpu_device_properties_.maxThreadsPerBlock,
  90. gpu_device_properties_.maxThreadsDim[0],
  91. gpu_device_properties_.maxThreadsDim[1],
  92. gpu_device_properties_.maxThreadsDim[2],
  93. gpu_device_properties_.maxGridSize[0],
  94. gpu_device_properties_.maxGridSize[1],
  95. gpu_device_properties_.maxGridSize[2],
  96. gpu_device_properties_.multiProcessorCount,
  97. // In CUDA 12.0.0+ cudaDeviceProp has field memoryPoolsSupported, but it
  98. // is not available in older versions
  99. is_cuda_memory_pools_supported_ ? "Yes" : "No");
  100. }
  101. size_t ContextImpl::GpuMemoryAvailable() const {
  102. size_t free, total;
  103. cudaMemGetInfo(&free, &total);
  104. return free;
  105. }
  106. bool ContextImpl::InitCuda(std::string* message) {
  107. if (is_cuda_initialized_) {
  108. return true;
  109. }
  110. CHECK_EQ(cudaGetDevice(&gpu_device_id_in_use_), cudaSuccess);
  111. int cuda_version;
  112. CHECK_EQ(cudaRuntimeGetVersion(&cuda_version), cudaSuccess);
  113. cuda_version_major_ = cuda_version / 1000;
  114. cuda_version_minor_ = (cuda_version % 1000) / 10;
  115. CHECK_EQ(
  116. cudaGetDeviceProperties(&gpu_device_properties_, gpu_device_id_in_use_),
  117. cudaSuccess);
  118. #if CUDART_VERSION >= 11020
  119. int is_cuda_memory_pools_supported;
  120. CHECK_EQ(cudaDeviceGetAttribute(&is_cuda_memory_pools_supported,
  121. cudaDevAttrMemoryPoolsSupported,
  122. gpu_device_id_in_use_),
  123. cudaSuccess);
  124. is_cuda_memory_pools_supported_ = is_cuda_memory_pools_supported == 1;
  125. #endif
  126. VLOG(3) << "\n" << CudaConfigAsString();
  127. EventLogger event_logger("InitCuda");
  128. if (cublasCreate(&cublas_handle_) != CUBLAS_STATUS_SUCCESS) {
  129. *message =
  130. "CUDA initialization failed because cuBLAS::cublasCreate failed.";
  131. cublas_handle_ = nullptr;
  132. return false;
  133. }
  134. event_logger.AddEvent("cublasCreate");
  135. if (cusolverDnCreate(&cusolver_handle_) != CUSOLVER_STATUS_SUCCESS) {
  136. *message =
  137. "CUDA initialization failed because cuSolverDN::cusolverDnCreate "
  138. "failed.";
  139. TearDown();
  140. return false;
  141. }
  142. event_logger.AddEvent("cusolverDnCreate");
  143. if (cusparseCreate(&cusparse_handle_) != CUSPARSE_STATUS_SUCCESS) {
  144. *message =
  145. "CUDA initialization failed because cuSPARSE::cusparseCreate failed.";
  146. TearDown();
  147. return false;
  148. }
  149. event_logger.AddEvent("cusparseCreate");
  150. for (auto& s : streams_) {
  151. if (cudaStreamCreateWithFlags(&s, cudaStreamNonBlocking) != cudaSuccess) {
  152. *message =
  153. "CUDA initialization failed because CUDA::cudaStreamCreateWithFlags "
  154. "failed.";
  155. TearDown();
  156. return false;
  157. }
  158. }
  159. event_logger.AddEvent("cudaStreamCreateWithFlags");
  160. if (cusolverDnSetStream(cusolver_handle_, DefaultStream()) !=
  161. CUSOLVER_STATUS_SUCCESS ||
  162. cublasSetStream(cublas_handle_, DefaultStream()) !=
  163. CUBLAS_STATUS_SUCCESS ||
  164. cusparseSetStream(cusparse_handle_, DefaultStream()) !=
  165. CUSPARSE_STATUS_SUCCESS) {
  166. *message = "CUDA initialization failed because SetStream failed.";
  167. TearDown();
  168. return false;
  169. }
  170. event_logger.AddEvent("SetStream");
  171. is_cuda_initialized_ = true;
  172. return true;
  173. }
  174. #endif // CERES_NO_CUDA
  175. ContextImpl::~ContextImpl() {
  176. #ifndef CERES_NO_CUDA
  177. TearDown();
  178. #endif // CERES_NO_CUDA
  179. }
  180. void ContextImpl::EnsureMinimumThreads(int num_threads) {
  181. thread_pool.Resize(num_threads);
  182. }
  183. } // namespace ceres::internal