cuda_block_sparse_crs_view_test.cc 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. // Ceres Solver - A fast non-linear least squares minimizer
  2. // Copyright 2023 Google Inc. All rights reserved.
  3. // http://ceres-solver.org/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are met:
  7. //
  8. // * Redistributions of source code must retain the above copyright notice,
  9. // this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above copyright notice,
  11. // this list of conditions and the following disclaimer in the documentation
  12. // and/or other materials provided with the distribution.
  13. // * Neither the name of Google Inc. nor the names of its contributors may be
  14. // used to endorse or promote products derived from this software without
  15. // specific prior written permission.
  16. //
  17. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18. // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21. // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22. // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23. // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24. // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25. // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26. // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27. // POSSIBILITY OF SUCH DAMAGE.
  28. //
  29. // Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
  30. #include "ceres/cuda_block_sparse_crs_view.h"
  31. #include <glog/logging.h>
  32. #include <gtest/gtest.h>
  33. #include <numeric>
  34. #ifndef CERES_NO_CUDA
  35. namespace ceres::internal {
  36. class CudaBlockSparseCRSViewTest : public ::testing::Test {
  37. protected:
  38. void SetUp() final {
  39. std::string message;
  40. CHECK(context_.InitCuda(&message))
  41. << "InitCuda() failed because: " << message;
  42. BlockSparseMatrix::RandomMatrixOptions options;
  43. options.num_row_blocks = 1234;
  44. options.min_row_block_size = 1;
  45. options.max_row_block_size = 10;
  46. options.num_col_blocks = 567;
  47. options.min_col_block_size = 1;
  48. options.max_col_block_size = 10;
  49. options.block_density = 0.2;
  50. std::mt19937 rng;
  51. // Block-sparse matrix with order of values different from CRS
  52. block_sparse_non_crs_compatible_ =
  53. BlockSparseMatrix::CreateRandomMatrix(options, rng, true);
  54. std::iota(block_sparse_non_crs_compatible_->mutable_values(),
  55. block_sparse_non_crs_compatible_->mutable_values() +
  56. block_sparse_non_crs_compatible_->num_nonzeros(),
  57. 1);
  58. options.max_row_block_size = 1;
  59. // Block-sparse matrix with CRS order of values (row-blocks are rows)
  60. block_sparse_crs_compatible_rows_ =
  61. BlockSparseMatrix::CreateRandomMatrix(options, rng, true);
  62. std::iota(block_sparse_crs_compatible_rows_->mutable_values(),
  63. block_sparse_crs_compatible_rows_->mutable_values() +
  64. block_sparse_crs_compatible_rows_->num_nonzeros(),
  65. 1);
  66. // Block-sparse matrix with CRS order of values (single cell per row-block)
  67. auto bs = std::make_unique<CompressedRowBlockStructure>(
  68. *block_sparse_non_crs_compatible_->block_structure());
  69. int num_nonzeros = 0;
  70. for (auto& r : bs->rows) {
  71. const int num_cells = r.cells.size();
  72. if (num_cells > 1) {
  73. std::uniform_int_distribution<int> uniform_cell(0, num_cells - 1);
  74. const int selected_cell = uniform_cell(rng);
  75. std::swap(r.cells[0], r.cells[selected_cell]);
  76. r.cells.resize(1);
  77. }
  78. const int row_block_size = r.block.size;
  79. for (auto& c : r.cells) {
  80. c.position = num_nonzeros;
  81. const int col_block_size = bs->cols[c.block_id].size;
  82. num_nonzeros += col_block_size * row_block_size;
  83. }
  84. }
  85. block_sparse_crs_compatible_single_cell_ =
  86. std::make_unique<BlockSparseMatrix>(bs.release());
  87. std::iota(block_sparse_crs_compatible_single_cell_->mutable_values(),
  88. block_sparse_crs_compatible_single_cell_->mutable_values() +
  89. block_sparse_crs_compatible_single_cell_->num_nonzeros(),
  90. 1);
  91. }
  92. void Compare(const BlockSparseMatrix& bsm, const CudaSparseMatrix& csm) {
  93. ASSERT_EQ(csm.num_cols(), bsm.num_cols());
  94. ASSERT_EQ(csm.num_rows(), bsm.num_rows());
  95. ASSERT_EQ(csm.num_nonzeros(), bsm.num_nonzeros());
  96. const int num_rows = bsm.num_rows();
  97. const int num_cols = bsm.num_cols();
  98. Vector x(num_cols);
  99. Vector y(num_rows);
  100. CudaVector x_cuda(&context_, num_cols);
  101. CudaVector y_cuda(&context_, num_rows);
  102. Vector y_cuda_host(num_rows);
  103. for (int i = 0; i < num_cols; ++i) {
  104. x.setZero();
  105. y.setZero();
  106. y_cuda.SetZero();
  107. x[i] = 1.;
  108. x_cuda.CopyFromCpu(x);
  109. csm.RightMultiplyAndAccumulate(x_cuda, &y_cuda);
  110. bsm.RightMultiplyAndAccumulate(
  111. x.data(), y.data(), &context_, std::thread::hardware_concurrency());
  112. y_cuda.CopyTo(&y_cuda_host);
  113. // There will be up to 1 non-zero product per row, thus we expect an exact
  114. // match on 32-bit integer indices
  115. EXPECT_EQ((y - y_cuda_host).squaredNorm(), 0.);
  116. }
  117. }
  118. std::unique_ptr<BlockSparseMatrix> block_sparse_non_crs_compatible_;
  119. std::unique_ptr<BlockSparseMatrix> block_sparse_crs_compatible_rows_;
  120. std::unique_ptr<BlockSparseMatrix> block_sparse_crs_compatible_single_cell_;
  121. ContextImpl context_;
  122. };
  123. TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesNonCompatible) {
  124. auto view =
  125. CudaBlockSparseCRSView(*block_sparse_non_crs_compatible_, &context_);
  126. ASSERT_EQ(view.IsCrsCompatible(), false);
  127. auto matrix = view.crs_matrix();
  128. Compare(*block_sparse_non_crs_compatible_, *matrix);
  129. }
  130. TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesCompatibleRows) {
  131. auto view =
  132. CudaBlockSparseCRSView(*block_sparse_crs_compatible_rows_, &context_);
  133. ASSERT_EQ(view.IsCrsCompatible(), true);
  134. auto matrix = view.crs_matrix();
  135. Compare(*block_sparse_crs_compatible_rows_, *matrix);
  136. }
  137. TEST_F(CudaBlockSparseCRSViewTest, CreateUpdateValuesCompatibleSingleCell) {
  138. auto view = CudaBlockSparseCRSView(*block_sparse_crs_compatible_single_cell_,
  139. &context_);
  140. ASSERT_EQ(view.IsCrsCompatible(), true);
  141. auto matrix = view.crs_matrix();
  142. Compare(*block_sparse_crs_compatible_single_cell_, *matrix);
  143. }
  144. } // namespace ceres::internal
  145. #endif // CERES_NO_CUDA