SRI-DYZBC2
/
Vehicle-cpp


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
							// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
//   this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
//   this list of conditions and the following disclaimer in the documentation
//   and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
//   used to endorse or promote products derived from this software without
//   specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
//

#ifndef CERES_INTERNAL_CUDA_PARTITIONED_BLOCK_SPARSE_CRS_VIEW_H_
#define CERES_INTERNAL_CUDA_PARTITIONED_BLOCK_SPARSE_CRS_VIEW_H_

#include "ceres/internal/config.h"

#ifndef CERES_NO_CUDA

#include <memory>

#include "ceres/block_sparse_matrix.h"
#include "ceres/cuda_block_structure.h"
#include "ceres/cuda_buffer.h"
#include "ceres/cuda_sparse_matrix.h"
#include "ceres/cuda_streamed_buffer.h"

namespace ceres::internal {
// We use cuSPARSE library for SpMV operations. However, it does not support
// neither block-sparse format with varying size of the blocks nor
// submatrix-vector products. Thus, we perform the following operations in order
// to compute products of partitioned block-sparse matrices and dense vectors on
// gpu:
//  - Once per block-sparse structure update:
//    - Compute CRS structures of left and right submatrices from block-sparse
//    structure
//    - Check if values of F sub-matrix can be copied without permutation
//    matrices
//  - Once per block-sparse values update:
//    - Copy values of E sub-matrix
//    - Permute or copy values of F sub-matrix
//
// It is assumed that cells of block-sparse matrix are laid out sequentially in
// both of sub-matrices and there is exactly one cell in row-block of E
// sub-matrix in the first num_row_blocks_e_ row blocks, and no cells in E
// sub-matrix below num_row_blocks_e_ row blocks.
//
// This class avoids storing both CRS and block-sparse values in GPU memory.
// Instead, block-sparse values are transferred to gpu memory as a disjoint set
// of small continuous segments with simultaneous permutation of the values into
// correct order using block-structure.
class CERES_NO_EXPORT CudaPartitionedBlockSparseCRSView {
 public:
  // Initializes internal CRS matrix and block-sparse structure on GPU side
  // values. The following objects are stored in gpu memory for the whole
  // lifetime of the object
  //  - matrix_e_: left CRS submatrix
  //  - matrix_f_: right CRS submatrix
  //  - block_structure_: copy of block-sparse structure on GPU
  //  - streamed_buffer_: helper for value updating
  CudaPartitionedBlockSparseCRSView(const BlockSparseMatrix& bsm,
                                    const int num_col_blocks_e,
                                    ContextImpl* context);

  // Update values of CRS submatrices using values of block-sparse matrix.
  // Assumes that bsm has the same block-sparse structure as matrix that was
  // used for construction.
  void UpdateValues(const BlockSparseMatrix& bsm);

  const CudaSparseMatrix* matrix_e() const { return matrix_e_.get(); }
  const CudaSparseMatrix* matrix_f() const { return matrix_f_.get(); }
  CudaSparseMatrix* mutable_matrix_e() { return matrix_e_.get(); }
  CudaSparseMatrix* mutable_matrix_f() { return matrix_f_.get(); }

 private:
  // Value permutation kernel performs a single element-wise operation per
  // thread, thus performing permutation in blocks of 8 megabytes of
  // block-sparse  values seems reasonable
  static constexpr int kMaxTemporaryArraySize = 1 * 1024 * 1024;
  std::unique_ptr<CudaSparseMatrix> matrix_e_;
  std::unique_ptr<CudaSparseMatrix> matrix_f_;
  std::unique_ptr<CudaStreamedBuffer<double>> streamed_buffer_;
  std::unique_ptr<CudaBlockSparseStructure> block_structure_;
  bool f_is_crs_compatible_;
  int num_row_blocks_e_;
  ContextImpl* context_;
};

}  // namespace ceres::internal

#endif  // CERES_NO_CUDA
#endif  // CERES_INTERNAL_CUDA_PARTITIONED_BLOCK_SPARSE_CRS_VIEW_H_