SRI-DYZBC2
/
Vehicle-cpp


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
							// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2023 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
//   this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
//   this list of conditions and the following disclaimer in the documentation
//   and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
//   used to endorse or promote products derived from this software without
//   specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)

#include "ceres/cuda_block_sparse_crs_view.h"

#include <glog/logging.h>
#include <gtest/gtest.h>

#include <numeric>

#ifndef CERES_NO_CUDA

namespace ceres::internal {
class CudaBlockSparseCRSViewTest : public ::testing::Test {
 protected:
  void SetUp() final {
    std::string message;
    CHECK(context_.InitCuda(&message))
        << "InitCuda() failed because: " << message;

    BlockSparseMatrix::RandomMatrixOptions options;
    options.num_row_blocks = 1234;
    options.min_row_block_size = 1;
    options.max_row_block_size = 10;
    options.num_col_blocks = 567;
    options.min_col_block_size = 1;
    options.max_col_block_size = 10;
    options.block_density = 0.2;
    std::mt19937 rng;

    // Block-sparse matrix with order of values different from CRS
    block_sparse_non_crs_compatible_ =
        BlockSparseMatrix::CreateRandomMatrix(options, rng, true);
    std::iota(block_sparse_non_crs_compatible_->mutable_values(),
              block_sparse_non_crs_compatible_->mutable_values() +
                  block_sparse_non_crs_compatible_->num_nonzeros(),
              1);

    options.max_row_block_size = 1;
    // Block-sparse matrix with CRS order of values (row-blocks are rows)
    block_sparse_crs_compatible_rows_ =
        BlockSparseMatrix::CreateRandomMatrix(options, rng, true);
    std::iota(block_sparse_crs_compatible_rows_->mutable_values(),
              block_sparse_crs_compatible_rows_->mutable_values() +
                  block_sparse_crs_compatible_rows_->num_nonzeros(),
              1);
    // Block-sparse matrix with CRS order of values (single cell per row-block)
    auto bs = std::make_unique<CompressedRowBlockStructure>(
        *block_sparse_non_crs_compatible_->block_structure());

    int num_nonzeros = 0;
    for (auto& r : bs->rows) {
      const int num_cells = r.cells.size();
      if (num_cells > 1) {
        std::uniform_int_distribution<int> uniform_cell(0, num_cells - 1);
        const int selected_cell = uniform_cell(rng);
        std::swap(r.cells[0], r.cells[selected_cell]);
        r.cells.resize(1);
      }
      const int row_block_size = r.block.size;
      for (auto& c : r.cells) {
        c.position = num_nonzeros;
        const int col_block_size = bs->cols[c.block_id].size;
        num_nonzeros += col_block_size * row_block_size;
      }
    }
    block_sparse_crs_compatible_single_cell_ =
        std::make_unique<BlockSparseMatrix>(bs.release());
    std::iota(block_sparse_crs_compatible_single_cell_->mutable_values(),
              block_sparse_crs_compatible_single_cell_->mutable_values() +
                  block_sparse_crs_compatible_single_cell_->num_nonzeros(),
              1);
  }

  void Compare(const BlockSparseMatrix& bsm, const CudaSparseMatrix& csm) {
    ASSERT_EQ(csm.num_cols(), bsm.num_cols());
    ASSERT_EQ(csm.num_rows(), bsm.num_rows());
    ASSERT_EQ(csm.num_nonzeros(), bsm.num_nonzeros());
    const int num_rows = bsm.num_rows();
    const int num_cols = bsm.num_cols();
    Vector x(num_cols);
    Vector y(num_rows);
    CudaVector x_cuda(&context_, num_cols);
    CudaVector y_cuda(&context_, num_rows);
    Vector y_cuda_host(num_rows);

    for (int i = 0; i < num_cols; ++i) {
      x.setZero();
      y.setZero();
      y_cuda.SetZero();
      x[i] = 1.;
      x_cuda.CopyFromCpu(x);
      csm.RightMultiplyAndAccumulate(x_cuda, &y_cuda);
      bsm.RightMultiplyAndAccumulate(
          x.data(), y.data(), &context_, std::thread::hardware_concurrency());
      y_cuda.CopyTo(&y_cuda_host);
      // There will be up to 1 non-zero product per row, thus we expect an exact
      // match on 32-bit integer indices
      EXPECT_EQ((y - y_cuda_host).squaredNorm(), 0.);
    }
  }

  std::unique_ptr<BlockSparseMatrix> block_sparse_non_crs_compatible_;
  std::unique_ptr<BlockSparseMatrix> block_sparse_crs_compatible_rows_;
  std::unique_ptr<BlockSparseMatrix> block_sparse_crs_compatible_single_cell_;
  ContextImpl context_;
};

TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesNonCompatible) {
  auto view =
      CudaBlockSparseCRSView(*block_sparse_non_crs_compatible_, &context_);
  ASSERT_EQ(view.IsCrsCompatible(), false);

  auto matrix = view.crs_matrix();
  Compare(*block_sparse_non_crs_compatible_, *matrix);
}

TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesCompatibleRows) {
  auto view =
      CudaBlockSparseCRSView(*block_sparse_crs_compatible_rows_, &context_);
  ASSERT_EQ(view.IsCrsCompatible(), true);

  auto matrix = view.crs_matrix();
  Compare(*block_sparse_crs_compatible_rows_, *matrix);
}

TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesCompatibleSingleCell) {
  auto view = CudaBlockSparseCRSView(*block_sparse_crs_compatible_single_cell_,
                                     &context_);
  ASSERT_EQ(view.IsCrsCompatible(), true);

  auto matrix = view.crs_matrix();
  Compare(*block_sparse_crs_compatible_single_cell_, *matrix);
}
}  // namespace ceres::internal

#endif  // CERES_NO_CUDA