123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152 |
- // Ceres Solver - A fast non-linear least squares minimizer
- // Copyright 2023 Google Inc. All rights reserved.
- // http://ceres-solver.org/
- //
- // Redistribution and use in source and binary forms, with or without
- // modification, are permitted provided that the following conditions are met:
- //
- // * Redistributions of source code must retain the above copyright notice,
- // this list of conditions and the following disclaimer.
- // * Redistributions in binary form must reproduce the above copyright notice,
- // this list of conditions and the following disclaimer in the documentation
- // and/or other materials provided with the distribution.
- // * Neither the name of Google Inc. nor the names of its contributors may be
- // used to endorse or promote products derived from this software without
- // specific prior written permission.
- //
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- // POSSIBILITY OF SUCH DAMAGE.
- //
- // Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
- #include "ceres/cuda_partitioned_block_sparse_crs_view.h"
- #ifndef CERES_NO_CUDA
- #include "ceres/cuda_block_structure.h"
- #include "ceres/cuda_kernels_bsm_to_crs.h"
- namespace ceres::internal {
- CudaPartitionedBlockSparseCRSView::CudaPartitionedBlockSparseCRSView(
- const BlockSparseMatrix& bsm,
- const int num_col_blocks_e,
- ContextImpl* context)
- :
- context_(context) {
- const auto& bs = *bsm.block_structure();
- block_structure_ =
- std::make_unique<CudaBlockSparseStructure>(bs, num_col_blocks_e, context);
- // Determine number of non-zeros in left submatrix
- // Row-blocks are at least 1 row high, thus we can use a temporary array of
- // num_rows for ComputeNonZerosInColumnBlockSubMatrix; and later reuse it for
- // FillCRSStructurePartitioned
- const int num_rows = bsm.num_rows();
- const int num_nonzeros_e = block_structure_->num_nonzeros_e();
- const int num_nonzeros_f = bsm.num_nonzeros() - num_nonzeros_e;
- const int num_cols_e = num_col_blocks_e < bs.cols.size()
- ? bs.cols[num_col_blocks_e].position
- : bsm.num_cols();
- const int num_cols_f = bsm.num_cols() - num_cols_e;
- CudaBuffer<int32_t> rows_e(context, num_rows + 1);
- CudaBuffer<int32_t> cols_e(context, num_nonzeros_e);
- CudaBuffer<int32_t> rows_f(context, num_rows + 1);
- CudaBuffer<int32_t> cols_f(context, num_nonzeros_f);
- num_row_blocks_e_ = block_structure_->num_row_blocks_e();
- FillCRSStructurePartitioned(block_structure_->num_row_blocks(),
- num_rows,
- num_row_blocks_e_,
- num_col_blocks_e,
- num_nonzeros_e,
- block_structure_->first_cell_in_row_block(),
- block_structure_->cells(),
- block_structure_->row_blocks(),
- block_structure_->col_blocks(),
- rows_e.data(),
- cols_e.data(),
- rows_f.data(),
- cols_f.data(),
- context->DefaultStream(),
- context->is_cuda_memory_pools_supported_);
- f_is_crs_compatible_ = block_structure_->IsCrsCompatible();
- if (f_is_crs_compatible_) {
- block_structure_ = nullptr;
- } else {
- streamed_buffer_ = std::make_unique<CudaStreamedBuffer<double>>(
- context, kMaxTemporaryArraySize);
- }
- matrix_e_ = std::make_unique<CudaSparseMatrix>(
- num_cols_e, std::move(rows_e), std::move(cols_e), context);
- matrix_f_ = std::make_unique<CudaSparseMatrix>(
- num_cols_f, std::move(rows_f), std::move(cols_f), context);
- CHECK_EQ(bsm.num_nonzeros(),
- matrix_e_->num_nonzeros() + matrix_f_->num_nonzeros());
- UpdateValues(bsm);
- }
- void CudaPartitionedBlockSparseCRSView::UpdateValues(
- const BlockSparseMatrix& bsm) {
- if (f_is_crs_compatible_) {
- CHECK_EQ(cudaSuccess,
- cudaMemcpyAsync(matrix_e_->mutable_values(),
- bsm.values(),
- matrix_e_->num_nonzeros() * sizeof(double),
- cudaMemcpyHostToDevice,
- context_->DefaultStream()));
- CHECK_EQ(cudaSuccess,
- cudaMemcpyAsync(matrix_f_->mutable_values(),
- bsm.values() + matrix_e_->num_nonzeros(),
- matrix_f_->num_nonzeros() * sizeof(double),
- cudaMemcpyHostToDevice,
- context_->DefaultStream()));
- return;
- }
- streamed_buffer_->CopyToGpu(
- bsm.values(),
- bsm.num_nonzeros(),
- [block_structure = block_structure_.get(),
- num_nonzeros_e = matrix_e_->num_nonzeros(),
- num_row_blocks_e = num_row_blocks_e_,
- values_f = matrix_f_->mutable_values(),
- rows_f = matrix_f_->rows()](
- const double* values, int num_values, int offset, auto stream) {
- PermuteToCRSPartitionedF(num_nonzeros_e + offset,
- num_values,
- block_structure->num_row_blocks(),
- num_row_blocks_e,
- block_structure->first_cell_in_row_block(),
- block_structure->value_offset_row_block_f(),
- block_structure->cells(),
- block_structure->row_blocks(),
- block_structure->col_blocks(),
- rows_f,
- values,
- values_f,
- stream);
- });
- CHECK_EQ(cudaSuccess,
- cudaMemcpyAsync(matrix_e_->mutable_values(),
- bsm.values(),
- matrix_e_->num_nonzeros() * sizeof(double),
- cudaMemcpyHostToDevice,
- context_->DefaultStream()));
- }
- } // namespace ceres::internal
- #endif // CERES_NO_CUDA
|