| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172 | // Ceres Solver - A fast non-linear least squares minimizer// Copyright 2023 Google Inc. All rights reserved.// http://ceres-solver.org///// Redistribution and use in source and binary forms, with or without// modification, are permitted provided that the following conditions are met://// * Redistributions of source code must retain the above copyright notice,//   this list of conditions and the following disclaimer.// * Redistributions in binary form must reproduce the above copyright notice,//   this list of conditions and the following disclaimer in the documentation//   and/or other materials provided with the distribution.// * Neither the name of Google Inc. nor the names of its contributors may be//   used to endorse or promote products derived from this software without//   specific prior written permission.//// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE// POSSIBILITY OF SUCH DAMAGE.//// Author: joydeepb@cs.utexas.edu (Joydeep Biswas)#ifndef CERES_INTERNAL_CUDA_BUFFER_H_#define CERES_INTERNAL_CUDA_BUFFER_H_#include "ceres/context_impl.h"#include "ceres/internal/config.h"#ifndef CERES_NO_CUDA#include <vector>#include "cuda_runtime.h"#include "glog/logging.h"namespace ceres::internal {// An encapsulated buffer to maintain GPU memory, and handle transfers between// GPU and system memory. It is the responsibility of the user to ensure that// the appropriate GPU device is selected before each subroutine is called. This// is particularly important when using multiple GPU devices on different CPU// threads, since active Cuda devices are determined by the cuda runtime on a// per-thread basis.template <typename T>class CudaBuffer { public:  explicit CudaBuffer(ContextImpl* context) : context_(context) {}  CudaBuffer(ContextImpl* context, int size) : context_(context) {    Reserve(size);  }  CudaBuffer(CudaBuffer&& other)      : data_(other.data_), size_(other.size_), context_(other.context_) {    other.data_ = nullptr;    other.size_ = 0;  }  CudaBuffer(const CudaBuffer&) = delete;  CudaBuffer& operator=(const CudaBuffer&) = delete;  ~CudaBuffer() {    if (data_ != nullptr) {      CHECK_EQ(cudaFree(data_), cudaSuccess);    }  }  // Grow the GPU memory buffer if needed to accommodate data of the specified  // size  void Reserve(const size_t size) {    if (size > size_) {      if (data_ != nullptr) {        CHECK_EQ(cudaFree(data_), cudaSuccess);      }      CHECK_EQ(cudaMalloc(&data_, size * sizeof(T)), cudaSuccess)          << "Failed to allocate " << size * sizeof(T)          << " bytes of GPU memory";      size_ = size;    }  }  // Perform an asynchronous copy from CPU memory to GPU memory managed by this  // CudaBuffer instance using the stream provided.  void CopyFromCpu(const T* data, const size_t size) {    Reserve(size);    CHECK_EQ(cudaMemcpyAsync(data_,                             data,                             size * sizeof(T),                             cudaMemcpyHostToDevice,                             context_->DefaultStream()),             cudaSuccess);  }  // Perform an asynchronous copy from a vector in CPU memory to GPU memory  // managed by this CudaBuffer instance.  void CopyFromCpuVector(const std::vector<T>& data) {    Reserve(data.size());    CHECK_EQ(cudaMemcpyAsync(data_,                             data.data(),                             data.size() * sizeof(T),                             cudaMemcpyHostToDevice,                             context_->DefaultStream()),             cudaSuccess);  }  // Perform an asynchronous copy from another GPU memory array to the GPU  // memory managed by this CudaBuffer instance using the stream provided.  void CopyFromGPUArray(const T* data, const size_t size) {    Reserve(size);    CHECK_EQ(cudaMemcpyAsync(data_,                             data,                             size * sizeof(T),                             cudaMemcpyDeviceToDevice,                             context_->DefaultStream()),             cudaSuccess);  }  // Copy data from the GPU memory managed by this CudaBuffer instance to CPU  // memory. It is the caller's responsibility to ensure that the CPU memory  // pointer is valid, i.e. it is not null, and that it points to memory of  // at least this->size() size. This method ensures all previously dispatched  // GPU operations on the specified stream have completed before copying the  // data to CPU memory.  void CopyToCpu(T* data, const size_t size) const {    CHECK(data_ != nullptr);    CHECK_EQ(cudaMemcpyAsync(data,                             data_,                             size * sizeof(T),                             cudaMemcpyDeviceToHost,                             context_->DefaultStream()),             cudaSuccess);    CHECK_EQ(cudaStreamSynchronize(context_->DefaultStream()), cudaSuccess);  }  // Copy N items from another GPU memory array to the GPU memory managed by  // this CudaBuffer instance, growing this buffer's size if needed. This copy  // is asynchronous, and operates on the stream provided.  void CopyNItemsFrom(int n, const CudaBuffer<T>& other) {    Reserve(n);    CHECK(other.data_ != nullptr);    CHECK(data_ != nullptr);    CHECK_EQ(cudaMemcpyAsync(data_,                             other.data_,                             size_ * sizeof(T),                             cudaMemcpyDeviceToDevice,                             context_->DefaultStream()),             cudaSuccess);  }  // Return a pointer to the GPU memory managed by this CudaBuffer instance.  T* data() { return data_; }  const T* data() const { return data_; }  // Return the number of items of type T that can fit in the GPU memory  // allocated so far by this CudaBuffer instance.  size_t size() const { return size_; } private:  T* data_ = nullptr;  size_t size_ = 0;  ContextImpl* context_ = nullptr;};}  // namespace ceres::internal#endif  // CERES_NO_CUDA#endif  // CERES_INTERNAL_CUDA_BUFFER_H_
 |