123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286 |
- // Ceres Solver - A fast non-linear least squares minimizer
- // Copyright 2023 Google Inc. All rights reserved.
- // http://ceres-solver.org/
- //
- // Redistribution and use in source and binary forms, with or without
- // modification, are permitted provided that the following conditions are met:
- //
- // * Redistributions of source code must retain the above copyright notice,
- // this list of conditions and the following disclaimer.
- // * Redistributions in binary form must reproduce the above copyright notice,
- // this list of conditions and the following disclaimer in the documentation
- // and/or other materials provided with the distribution.
- // * Neither the name of Google Inc. nor the names of its contributors may be
- // used to endorse or promote products derived from this software without
- // specific prior written permission.
- //
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- // POSSIBILITY OF SUCH DAMAGE.
- //
- // Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
- #include "ceres/cuda_sparse_matrix.h"
- #include <string>
- #include "ceres/block_sparse_matrix.h"
- #include "ceres/casts.h"
- #include "ceres/cuda_vector.h"
- #include "ceres/internal/config.h"
- #include "ceres/internal/eigen.h"
- #include "ceres/linear_least_squares_problems.h"
- #include "ceres/triplet_sparse_matrix.h"
- #include "glog/logging.h"
- #include "gtest/gtest.h"
- namespace ceres {
- namespace internal {
- #ifndef CERES_NO_CUDA
- class CudaSparseMatrixTest : public ::testing::Test {
- protected:
- void SetUp() final {
- std::string message;
- CHECK(context_.InitCuda(&message))
- << "InitCuda() failed because: " << message;
- std::unique_ptr<LinearLeastSquaresProblem> problem =
- CreateLinearLeastSquaresProblemFromId(2);
- CHECK(problem != nullptr);
- A_.reset(down_cast<BlockSparseMatrix*>(problem->A.release()));
- CHECK(A_ != nullptr);
- CHECK(problem->b != nullptr);
- CHECK(problem->x != nullptr);
- b_.resize(A_->num_rows());
- for (int i = 0; i < A_->num_rows(); ++i) {
- b_[i] = problem->b[i];
- }
- x_.resize(A_->num_cols());
- for (int i = 0; i < A_->num_cols(); ++i) {
- x_[i] = problem->x[i];
- }
- CHECK_EQ(A_->num_rows(), b_.rows());
- CHECK_EQ(A_->num_cols(), x_.rows());
- }
- std::unique_ptr<BlockSparseMatrix> A_;
- Vector x_;
- Vector b_;
- ContextImpl context_;
- };
- TEST_F(CudaSparseMatrixTest, RightMultiplyAndAccumulate) {
- std::string message;
- auto A_crs = A_->ToCompressedRowSparseMatrix();
- CudaSparseMatrix A_gpu(&context_, *A_crs);
- CudaVector x_gpu(&context_, A_gpu.num_cols());
- CudaVector res_gpu(&context_, A_gpu.num_rows());
- x_gpu.CopyFromCpu(x_);
- const Vector minus_b = -b_;
- // res = -b
- res_gpu.CopyFromCpu(minus_b);
- // res += A * x
- A_gpu.RightMultiplyAndAccumulate(x_gpu, &res_gpu);
- Vector res;
- res_gpu.CopyTo(&res);
- Vector res_expected = minus_b;
- A_->RightMultiplyAndAccumulate(x_.data(), res_expected.data());
- EXPECT_LE((res - res_expected).norm(),
- std::numeric_limits<double>::epsilon() * 1e3);
- }
- TEST(CudaSparseMatrix, CopyValuesFromCpu) {
- // A1:
- // [ 1 1 0 0
- // 0 1 1 0]
- // A2:
- // [ 1 2 0 0
- // 0 3 4 0]
- // b: [1 2 3 4]'
- // A1 * b = [3 5]'
- // A2 * b = [5 18]'
- TripletSparseMatrix A1(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 1, 1, 1});
- TripletSparseMatrix A2(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 2, 3, 4});
- Vector b(4);
- b << 1, 2, 3, 4;
- ContextImpl context;
- std::string message;
- CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
- auto A1_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A1);
- CudaSparseMatrix A_gpu(&context, *A1_crs);
- CudaVector b_gpu(&context, A1.num_cols());
- CudaVector x_gpu(&context, A1.num_rows());
- b_gpu.CopyFromCpu(b);
- x_gpu.SetZero();
- Vector x_expected(2);
- x_expected << 3, 5;
- A_gpu.RightMultiplyAndAccumulate(b_gpu, &x_gpu);
- Vector x_computed;
- x_gpu.CopyTo(&x_computed);
- EXPECT_EQ(x_computed, x_expected);
- auto A2_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A2);
- A_gpu.CopyValuesFromCpu(*A2_crs);
- x_gpu.SetZero();
- x_expected << 5, 18;
- A_gpu.RightMultiplyAndAccumulate(b_gpu, &x_gpu);
- x_gpu.CopyTo(&x_computed);
- EXPECT_EQ(x_computed, x_expected);
- }
- TEST(CudaSparseMatrix, RightMultiplyAndAccumulate) {
- // A:
- // [ 1 2 0 0
- // 0 3 4 0]
- // b: [1 2 3 4]'
- // A * b = [5 18]'
- TripletSparseMatrix A(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 2, 3, 4});
- Vector b(4);
- b << 1, 2, 3, 4;
- Vector x_expected(2);
- x_expected << 5, 18;
- ContextImpl context;
- std::string message;
- CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
- auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A);
- CudaSparseMatrix A_gpu(&context, *A_crs);
- CudaVector b_gpu(&context, A.num_cols());
- CudaVector x_gpu(&context, A.num_rows());
- b_gpu.CopyFromCpu(b);
- x_gpu.SetZero();
- A_gpu.RightMultiplyAndAccumulate(b_gpu, &x_gpu);
- Vector x_computed;
- x_gpu.CopyTo(&x_computed);
- EXPECT_EQ(x_computed, x_expected);
- }
- TEST(CudaSparseMatrix, LeftMultiplyAndAccumulate) {
- // A:
- // [ 1 2 0 0
- // 0 3 4 0]
- // b: [1 2]'
- // A'* b = [1 8 8 0]'
- TripletSparseMatrix A(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 2, 3, 4});
- Vector b(2);
- b << 1, 2;
- Vector x_expected(4);
- x_expected << 1, 8, 8, 0;
- ContextImpl context;
- std::string message;
- CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
- auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A);
- CudaSparseMatrix A_gpu(&context, *A_crs);
- CudaVector b_gpu(&context, A.num_rows());
- CudaVector x_gpu(&context, A.num_cols());
- b_gpu.CopyFromCpu(b);
- x_gpu.SetZero();
- A_gpu.LeftMultiplyAndAccumulate(b_gpu, &x_gpu);
- Vector x_computed;
- x_gpu.CopyTo(&x_computed);
- EXPECT_EQ(x_computed, x_expected);
- }
- // If there are numerical errors due to synchronization issues, they will show
- // up when testing with large matrices, since each operation will take
- // significant time, thus hopefully revealing any potential synchronization
- // issues.
- TEST(CudaSparseMatrix, LargeMultiplyAndAccumulate) {
- // Create a large NxN matrix A that has the following structure:
- // In row i, only columns i and i+1 are non-zero.
- // A_{i, i} = A_{i, i+1} = 1.
- // There will be 2 * N - 1 non-zero elements in A.
- // X = [1:N]
- // Right multiply test:
- // b = A * X
- // Left multiply test:
- // b = A' * X
- const int N = 10 * 1000 * 1000;
- const int num_non_zeros = 2 * N - 1;
- std::vector<int> row_indices(num_non_zeros);
- std::vector<int> col_indices(num_non_zeros);
- std::vector<double> values(num_non_zeros);
- for (int i = 0; i < N; ++i) {
- row_indices[2 * i] = i;
- col_indices[2 * i] = i;
- values[2 * i] = 1.0;
- if (i + 1 < N) {
- col_indices[2 * i + 1] = i + 1;
- row_indices[2 * i + 1] = i;
- values[2 * i + 1] = 1;
- }
- }
- TripletSparseMatrix A(N, N, row_indices, col_indices, values);
- Vector x(N);
- for (int i = 0; i < N; ++i) {
- x[i] = i + 1;
- }
- ContextImpl context;
- std::string message;
- CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
- auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A);
- CudaSparseMatrix A_gpu(&context, *A_crs);
- CudaVector b_gpu(&context, N);
- CudaVector x_gpu(&context, N);
- x_gpu.CopyFromCpu(x);
- // First check RightMultiply.
- {
- b_gpu.SetZero();
- A_gpu.RightMultiplyAndAccumulate(x_gpu, &b_gpu);
- Vector b_computed;
- b_gpu.CopyTo(&b_computed);
- for (int i = 0; i < N; ++i) {
- if (i + 1 < N) {
- EXPECT_EQ(b_computed[i], 2 * (i + 1) + 1);
- } else {
- EXPECT_EQ(b_computed[i], i + 1);
- }
- }
- }
- // Next check LeftMultiply.
- {
- b_gpu.SetZero();
- A_gpu.LeftMultiplyAndAccumulate(x_gpu, &b_gpu);
- Vector b_computed;
- b_gpu.CopyTo(&b_computed);
- for (int i = 0; i < N; ++i) {
- if (i > 0) {
- EXPECT_EQ(b_computed[i], 2 * (i + 1) - 1);
- } else {
- EXPECT_EQ(b_computed[i], i + 1);
- }
- }
- }
- }
- #endif // CERES_NO_CUDA
- } // namespace internal
- } // namespace ceres
|