cuda_sparse_matrix_test.cc 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. // Ceres Solver - A fast non-linear least squares minimizer
  2. // Copyright 2023 Google Inc. All rights reserved.
  3. // http://ceres-solver.org/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are met:
  7. //
  8. // * Redistributions of source code must retain the above copyright notice,
  9. // this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above copyright notice,
  11. // this list of conditions and the following disclaimer in the documentation
  12. // and/or other materials provided with the distribution.
  13. // * Neither the name of Google Inc. nor the names of its contributors may be
  14. // used to endorse or promote products derived from this software without
  15. // specific prior written permission.
  16. //
  17. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18. // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21. // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22. // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23. // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24. // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25. // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26. // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27. // POSSIBILITY OF SUCH DAMAGE.
  28. //
  29. // Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
  30. #include "ceres/cuda_sparse_matrix.h"
  31. #include <string>
  32. #include "ceres/block_sparse_matrix.h"
  33. #include "ceres/casts.h"
  34. #include "ceres/cuda_vector.h"
  35. #include "ceres/internal/config.h"
  36. #include "ceres/internal/eigen.h"
  37. #include "ceres/linear_least_squares_problems.h"
  38. #include "ceres/triplet_sparse_matrix.h"
  39. #include "glog/logging.h"
  40. #include "gtest/gtest.h"
  41. namespace ceres {
  42. namespace internal {
  43. #ifndef CERES_NO_CUDA
  44. class CudaSparseMatrixTest : public ::testing::Test {
  45. protected:
  46. void SetUp() final {
  47. std::string message;
  48. CHECK(context_.InitCuda(&message))
  49. << "InitCuda() failed because: " << message;
  50. std::unique_ptr<LinearLeastSquaresProblem> problem =
  51. CreateLinearLeastSquaresProblemFromId(2);
  52. CHECK(problem != nullptr);
  53. A_.reset(down_cast<BlockSparseMatrix*>(problem->A.release()));
  54. CHECK(A_ != nullptr);
  55. CHECK(problem->b != nullptr);
  56. CHECK(problem->x != nullptr);
  57. b_.resize(A_->num_rows());
  58. for (int i = 0; i < A_->num_rows(); ++i) {
  59. b_[i] = problem->b[i];
  60. }
  61. x_.resize(A_->num_cols());
  62. for (int i = 0; i < A_->num_cols(); ++i) {
  63. x_[i] = problem->x[i];
  64. }
  65. CHECK_EQ(A_->num_rows(), b_.rows());
  66. CHECK_EQ(A_->num_cols(), x_.rows());
  67. }
  68. std::unique_ptr<BlockSparseMatrix> A_;
  69. Vector x_;
  70. Vector b_;
  71. ContextImpl context_;
  72. };
  73. TEST_F(CudaSparseMatrixTest, RightMultiplyAndAccumulate) {
  74. std::string message;
  75. auto A_crs = A_->ToCompressedRowSparseMatrix();
  76. CudaSparseMatrix A_gpu(&context_, *A_crs);
  77. CudaVector x_gpu(&context_, A_gpu.num_cols());
  78. CudaVector res_gpu(&context_, A_gpu.num_rows());
  79. x_gpu.CopyFromCpu(x_);
  80. const Vector minus_b = -b_;
  81. // res = -b
  82. res_gpu.CopyFromCpu(minus_b);
  83. // res += A * x
  84. A_gpu.RightMultiplyAndAccumulate(x_gpu, &res_gpu);
  85. Vector res;
  86. res_gpu.CopyTo(&res);
  87. Vector res_expected = minus_b;
  88. A_->RightMultiplyAndAccumulate(x_.data(), res_expected.data());
  89. EXPECT_LE((res - res_expected).norm(),
  90. std::numeric_limits<double>::epsilon() * 1e3);
  91. }
  92. TEST(CudaSparseMatrix, CopyValuesFromCpu) {
  93. // A1:
  94. // [ 1 1 0 0
  95. // 0 1 1 0]
  96. // A2:
  97. // [ 1 2 0 0
  98. // 0 3 4 0]
  99. // b: [1 2 3 4]'
  100. // A1 * b = [3 5]'
  101. // A2 * b = [5 18]'
  102. TripletSparseMatrix A1(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 1, 1, 1});
  103. TripletSparseMatrix A2(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 2, 3, 4});
  104. Vector b(4);
  105. b << 1, 2, 3, 4;
  106. ContextImpl context;
  107. std::string message;
  108. CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
  109. auto A1_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A1);
  110. CudaSparseMatrix A_gpu(&context, *A1_crs);
  111. CudaVector b_gpu(&context, A1.num_cols());
  112. CudaVector x_gpu(&context, A1.num_rows());
  113. b_gpu.CopyFromCpu(b);
  114. x_gpu.SetZero();
  115. Vector x_expected(2);
  116. x_expected << 3, 5;
  117. A_gpu.RightMultiplyAndAccumulate(b_gpu, &x_gpu);
  118. Vector x_computed;
  119. x_gpu.CopyTo(&x_computed);
  120. EXPECT_EQ(x_computed, x_expected);
  121. auto A2_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A2);
  122. A_gpu.CopyValuesFromCpu(*A2_crs);
  123. x_gpu.SetZero();
  124. x_expected << 5, 18;
  125. A_gpu.RightMultiplyAndAccumulate(b_gpu, &x_gpu);
  126. x_gpu.CopyTo(&x_computed);
  127. EXPECT_EQ(x_computed, x_expected);
  128. }
  129. TEST(CudaSparseMatrix, RightMultiplyAndAccumulate) {
  130. // A:
  131. // [ 1 2 0 0
  132. // 0 3 4 0]
  133. // b: [1 2 3 4]'
  134. // A * b = [5 18]'
  135. TripletSparseMatrix A(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 2, 3, 4});
  136. Vector b(4);
  137. b << 1, 2, 3, 4;
  138. Vector x_expected(2);
  139. x_expected << 5, 18;
  140. ContextImpl context;
  141. std::string message;
  142. CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
  143. auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A);
  144. CudaSparseMatrix A_gpu(&context, *A_crs);
  145. CudaVector b_gpu(&context, A.num_cols());
  146. CudaVector x_gpu(&context, A.num_rows());
  147. b_gpu.CopyFromCpu(b);
  148. x_gpu.SetZero();
  149. A_gpu.RightMultiplyAndAccumulate(b_gpu, &x_gpu);
  150. Vector x_computed;
  151. x_gpu.CopyTo(&x_computed);
  152. EXPECT_EQ(x_computed, x_expected);
  153. }
  154. TEST(CudaSparseMatrix, LeftMultiplyAndAccumulate) {
  155. // A:
  156. // [ 1 2 0 0
  157. // 0 3 4 0]
  158. // b: [1 2]'
  159. // A'* b = [1 8 8 0]'
  160. TripletSparseMatrix A(2, 4, {0, 0, 1, 1}, {0, 1, 1, 2}, {1, 2, 3, 4});
  161. Vector b(2);
  162. b << 1, 2;
  163. Vector x_expected(4);
  164. x_expected << 1, 8, 8, 0;
  165. ContextImpl context;
  166. std::string message;
  167. CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
  168. auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A);
  169. CudaSparseMatrix A_gpu(&context, *A_crs);
  170. CudaVector b_gpu(&context, A.num_rows());
  171. CudaVector x_gpu(&context, A.num_cols());
  172. b_gpu.CopyFromCpu(b);
  173. x_gpu.SetZero();
  174. A_gpu.LeftMultiplyAndAccumulate(b_gpu, &x_gpu);
  175. Vector x_computed;
  176. x_gpu.CopyTo(&x_computed);
  177. EXPECT_EQ(x_computed, x_expected);
  178. }
  179. // If there are numerical errors due to synchronization issues, they will show
  180. // up when testing with large matrices, since each operation will take
  181. // significant time, thus hopefully revealing any potential synchronization
  182. // issues.
  183. TEST(CudaSparseMatrix, LargeMultiplyAndAccumulate) {
  184. // Create a large NxN matrix A that has the following structure:
  185. // In row i, only columns i and i+1 are non-zero.
  186. // A_{i, i} = A_{i, i+1} = 1.
  187. // There will be 2 * N - 1 non-zero elements in A.
  188. // X = [1:N]
  189. // Right multiply test:
  190. // b = A * X
  191. // Left multiply test:
  192. // b = A' * X
  193. const int N = 10 * 1000 * 1000;
  194. const int num_non_zeros = 2 * N - 1;
  195. std::vector<int> row_indices(num_non_zeros);
  196. std::vector<int> col_indices(num_non_zeros);
  197. std::vector<double> values(num_non_zeros);
  198. for (int i = 0; i < N; ++i) {
  199. row_indices[2 * i] = i;
  200. col_indices[2 * i] = i;
  201. values[2 * i] = 1.0;
  202. if (i + 1 < N) {
  203. col_indices[2 * i + 1] = i + 1;
  204. row_indices[2 * i + 1] = i;
  205. values[2 * i + 1] = 1;
  206. }
  207. }
  208. TripletSparseMatrix A(N, N, row_indices, col_indices, values);
  209. Vector x(N);
  210. for (int i = 0; i < N; ++i) {
  211. x[i] = i + 1;
  212. }
  213. ContextImpl context;
  214. std::string message;
  215. CHECK(context.InitCuda(&message)) << "InitCuda() failed because: " << message;
  216. auto A_crs = CompressedRowSparseMatrix::FromTripletSparseMatrix(A);
  217. CudaSparseMatrix A_gpu(&context, *A_crs);
  218. CudaVector b_gpu(&context, N);
  219. CudaVector x_gpu(&context, N);
  220. x_gpu.CopyFromCpu(x);
  221. // First check RightMultiply.
  222. {
  223. b_gpu.SetZero();
  224. A_gpu.RightMultiplyAndAccumulate(x_gpu, &b_gpu);
  225. Vector b_computed;
  226. b_gpu.CopyTo(&b_computed);
  227. for (int i = 0; i < N; ++i) {
  228. if (i + 1 < N) {
  229. EXPECT_EQ(b_computed[i], 2 * (i + 1) + 1);
  230. } else {
  231. EXPECT_EQ(b_computed[i], i + 1);
  232. }
  233. }
  234. }
  235. // Next check LeftMultiply.
  236. {
  237. b_gpu.SetZero();
  238. A_gpu.LeftMultiplyAndAccumulate(x_gpu, &b_gpu);
  239. Vector b_computed;
  240. b_gpu.CopyTo(&b_computed);
  241. for (int i = 0; i < N; ++i) {
  242. if (i > 0) {
  243. EXPECT_EQ(b_computed[i], 2 * (i + 1) - 1);
  244. } else {
  245. EXPECT_EQ(b_computed[i], i + 1);
  246. }
  247. }
  248. }
  249. }
  250. #endif // CERES_NO_CUDA
  251. } // namespace internal
  252. } // namespace ceres