cuda_dense_cholesky_test.cc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. // Ceres Solver - A fast non-linear least squares minimizer
  2. // Copyright 2023 Google Inc. All rights reserved.
  3. // http://ceres-solver.org/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are met:
  7. //
  8. // * Redistributions of source code must retain the above copyright notice,
  9. // this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above copyright notice,
  11. // this list of conditions and the following disclaimer in the documentation
  12. // and/or other materials provided with the distribution.
  13. // * Neither the name of Google Inc. nor the names of its contributors may be
  14. // used to endorse or promote products derived from this software without
  15. // specific prior written permission.
  16. //
  17. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18. // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21. // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22. // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23. // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24. // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25. // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26. // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27. // POSSIBILITY OF SUCH DAMAGE.
  28. //
  29. // Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
  30. #include <string>
  31. #include "ceres/dense_cholesky.h"
  32. #include "ceres/internal/config.h"
  33. #include "ceres/internal/eigen.h"
  34. #include "glog/logging.h"
  35. #include "gtest/gtest.h"
  36. namespace ceres::internal {
  37. #ifndef CERES_NO_CUDA
  38. TEST(CUDADenseCholesky, InvalidOptionOnCreate) {
  39. LinearSolver::Options options;
  40. ContextImpl context;
  41. options.context = &context;
  42. std::string error;
  43. EXPECT_TRUE(context.InitCuda(&error)) << error;
  44. auto dense_cuda_solver = CUDADenseCholesky::Create(options);
  45. EXPECT_EQ(dense_cuda_solver, nullptr);
  46. }
  47. // Tests the CUDA Cholesky solver with a simple 4x4 matrix.
  48. TEST(CUDADenseCholesky, Cholesky4x4Matrix) {
  49. Eigen::Matrix4d A;
  50. // clang-format off
  51. A << 4, 12, -16, 0,
  52. 12, 37, -43, 0,
  53. -16, -43, 98, 0,
  54. 0, 0, 0, 1;
  55. // clang-format on
  56. Vector b = Eigen::Vector4d::Ones();
  57. LinearSolver::Options options;
  58. ContextImpl context;
  59. options.context = &context;
  60. std::string error;
  61. EXPECT_TRUE(context.InitCuda(&error)) << error;
  62. options.dense_linear_algebra_library_type = CUDA;
  63. auto dense_cuda_solver = CUDADenseCholesky::Create(options);
  64. ASSERT_NE(dense_cuda_solver, nullptr);
  65. std::string error_string;
  66. ASSERT_EQ(dense_cuda_solver->Factorize(A.cols(), A.data(), &error_string),
  67. LinearSolverTerminationType::SUCCESS);
  68. Eigen::Vector4d x = Eigen::Vector4d::Zero();
  69. ASSERT_EQ(dense_cuda_solver->Solve(b.data(), x.data(), &error_string),
  70. LinearSolverTerminationType::SUCCESS);
  71. static const double kEpsilon = std::numeric_limits<double>::epsilon() * 10;
  72. const Eigen::Vector4d x_expected(113.75 / 3.0, -31.0 / 3.0, 5.0 / 3.0, 1.0);
  73. EXPECT_NEAR((x[0] - x_expected[0]) / x_expected[0], 0.0, kEpsilon);
  74. EXPECT_NEAR((x[1] - x_expected[1]) / x_expected[1], 0.0, kEpsilon);
  75. EXPECT_NEAR((x[2] - x_expected[2]) / x_expected[2], 0.0, kEpsilon);
  76. EXPECT_NEAR((x[3] - x_expected[3]) / x_expected[3], 0.0, kEpsilon);
  77. }
  78. TEST(CUDADenseCholesky, SingularMatrix) {
  79. Eigen::Matrix3d A;
  80. // clang-format off
  81. A << 1, 0, 0,
  82. 0, 1, 0,
  83. 0, 0, 0;
  84. // clang-format on
  85. LinearSolver::Options options;
  86. ContextImpl context;
  87. options.context = &context;
  88. std::string error;
  89. EXPECT_TRUE(context.InitCuda(&error)) << error;
  90. options.dense_linear_algebra_library_type = CUDA;
  91. auto dense_cuda_solver = CUDADenseCholesky::Create(options);
  92. ASSERT_NE(dense_cuda_solver, nullptr);
  93. std::string error_string;
  94. ASSERT_EQ(dense_cuda_solver->Factorize(A.cols(), A.data(), &error_string),
  95. LinearSolverTerminationType::FAILURE);
  96. }
  97. TEST(CUDADenseCholesky, NegativeMatrix) {
  98. Eigen::Matrix3d A;
  99. // clang-format off
  100. A << 1, 0, 0,
  101. 0, 1, 0,
  102. 0, 0, -1;
  103. // clang-format on
  104. LinearSolver::Options options;
  105. ContextImpl context;
  106. options.context = &context;
  107. std::string error;
  108. EXPECT_TRUE(context.InitCuda(&error)) << error;
  109. options.dense_linear_algebra_library_type = CUDA;
  110. auto dense_cuda_solver = CUDADenseCholesky::Create(options);
  111. ASSERT_NE(dense_cuda_solver, nullptr);
  112. std::string error_string;
  113. ASSERT_EQ(dense_cuda_solver->Factorize(A.cols(), A.data(), &error_string),
  114. LinearSolverTerminationType::FAILURE);
  115. }
  116. TEST(CUDADenseCholesky, MustFactorizeBeforeSolve) {
  117. const Eigen::Vector3d b = Eigen::Vector3d::Ones();
  118. LinearSolver::Options options;
  119. ContextImpl context;
  120. options.context = &context;
  121. std::string error;
  122. EXPECT_TRUE(context.InitCuda(&error)) << error;
  123. options.dense_linear_algebra_library_type = CUDA;
  124. auto dense_cuda_solver = CUDADenseCholesky::Create(options);
  125. ASSERT_NE(dense_cuda_solver, nullptr);
  126. std::string error_string;
  127. ASSERT_EQ(dense_cuda_solver->Solve(b.data(), nullptr, &error_string),
  128. LinearSolverTerminationType::FATAL_ERROR);
  129. }
  130. TEST(CUDADenseCholesky, Randomized1600x1600Tests) {
  131. const int kNumCols = 1600;
  132. using LhsType = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>;
  133. using RhsType = Eigen::Matrix<double, Eigen::Dynamic, 1>;
  134. using SolutionType = Eigen::Matrix<double, Eigen::Dynamic, 1>;
  135. LinearSolver::Options options;
  136. ContextImpl context;
  137. options.context = &context;
  138. std::string error;
  139. EXPECT_TRUE(context.InitCuda(&error)) << error;
  140. options.dense_linear_algebra_library_type = ceres::CUDA;
  141. std::unique_ptr<DenseCholesky> dense_cholesky =
  142. CUDADenseCholesky::Create(options);
  143. const int kNumTrials = 20;
  144. for (int i = 0; i < kNumTrials; ++i) {
  145. LhsType lhs = LhsType::Random(kNumCols, kNumCols);
  146. lhs = lhs.transpose() * lhs;
  147. lhs += 1e-3 * LhsType::Identity(kNumCols, kNumCols);
  148. SolutionType x_expected = SolutionType::Random(kNumCols);
  149. RhsType rhs = lhs * x_expected;
  150. SolutionType x_computed = SolutionType::Zero(kNumCols);
  151. // Sanity check the random matrix sizes.
  152. EXPECT_EQ(lhs.rows(), kNumCols);
  153. EXPECT_EQ(lhs.cols(), kNumCols);
  154. EXPECT_EQ(rhs.rows(), kNumCols);
  155. EXPECT_EQ(rhs.cols(), 1);
  156. EXPECT_EQ(x_expected.rows(), kNumCols);
  157. EXPECT_EQ(x_expected.cols(), 1);
  158. EXPECT_EQ(x_computed.rows(), kNumCols);
  159. EXPECT_EQ(x_computed.cols(), 1);
  160. LinearSolver::Summary summary;
  161. summary.termination_type = dense_cholesky->FactorAndSolve(
  162. kNumCols, lhs.data(), rhs.data(), x_computed.data(), &summary.message);
  163. ASSERT_EQ(summary.termination_type, LinearSolverTerminationType::SUCCESS);
  164. static const double kEpsilon = std::numeric_limits<double>::epsilon() * 3e5;
  165. ASSERT_NEAR(
  166. (x_computed - x_expected).norm() / x_expected.norm(), 0.0, kEpsilon);
  167. }
  168. }
  169. TEST(CUDADenseCholeskyMixedPrecision, InvalidOptionsOnCreate) {
  170. {
  171. // Did not ask for CUDA, and did not ask for mixed precision.
  172. LinearSolver::Options options;
  173. ContextImpl context;
  174. options.context = &context;
  175. std::string error;
  176. EXPECT_TRUE(context.InitCuda(&error)) << error;
  177. auto solver = CUDADenseCholeskyMixedPrecision::Create(options);
  178. ASSERT_EQ(solver, nullptr);
  179. }
  180. {
  181. // Asked for CUDA, but did not ask for mixed precision.
  182. LinearSolver::Options options;
  183. ContextImpl context;
  184. options.context = &context;
  185. std::string error;
  186. EXPECT_TRUE(context.InitCuda(&error)) << error;
  187. options.dense_linear_algebra_library_type = ceres::CUDA;
  188. auto solver = CUDADenseCholeskyMixedPrecision::Create(options);
  189. ASSERT_EQ(solver, nullptr);
  190. }
  191. }
  192. // Tests the CUDA Cholesky solver with a simple 4x4 matrix.
  193. TEST(CUDADenseCholeskyMixedPrecision, Cholesky4x4Matrix1Step) {
  194. Eigen::Matrix4d A;
  195. // clang-format off
  196. // A common test Cholesky decomposition test matrix, see :
  197. // https://en.wikipedia.org/w/index.php?title=Cholesky_decomposition&oldid=1080607368#Example
  198. A << 4, 12, -16, 0,
  199. 12, 37, -43, 0,
  200. -16, -43, 98, 0,
  201. 0, 0, 0, 1;
  202. // clang-format on
  203. const Eigen::Vector4d b = Eigen::Vector4d::Ones();
  204. LinearSolver::Options options;
  205. options.max_num_refinement_iterations = 0;
  206. ContextImpl context;
  207. options.context = &context;
  208. std::string error;
  209. EXPECT_TRUE(context.InitCuda(&error)) << error;
  210. options.dense_linear_algebra_library_type = CUDA;
  211. options.use_mixed_precision_solves = true;
  212. auto solver = CUDADenseCholeskyMixedPrecision::Create(options);
  213. ASSERT_NE(solver, nullptr);
  214. std::string error_string;
  215. ASSERT_EQ(solver->Factorize(A.cols(), A.data(), &error_string),
  216. LinearSolverTerminationType::SUCCESS);
  217. Eigen::Vector4d x = Eigen::Vector4d::Zero();
  218. ASSERT_EQ(solver->Solve(b.data(), x.data(), &error_string),
  219. LinearSolverTerminationType::SUCCESS);
  220. // A single step of the mixed precision solver will be equivalent to solving
  221. // in low precision (FP32). Hence the tolerance is defined w.r.t. FP32 epsilon
  222. // instead of FP64 epsilon.
  223. static const double kEpsilon = std::numeric_limits<float>::epsilon() * 10;
  224. const Eigen::Vector4d x_expected(113.75 / 3.0, -31.0 / 3.0, 5.0 / 3.0, 1.0);
  225. EXPECT_NEAR((x[0] - x_expected[0]) / x_expected[0], 0.0, kEpsilon);
  226. EXPECT_NEAR((x[1] - x_expected[1]) / x_expected[1], 0.0, kEpsilon);
  227. EXPECT_NEAR((x[2] - x_expected[2]) / x_expected[2], 0.0, kEpsilon);
  228. EXPECT_NEAR((x[3] - x_expected[3]) / x_expected[3], 0.0, kEpsilon);
  229. }
  230. // Tests the CUDA Cholesky solver with a simple 4x4 matrix.
  231. TEST(CUDADenseCholeskyMixedPrecision, Cholesky4x4Matrix4Steps) {
  232. Eigen::Matrix4d A;
  233. // clang-format off
  234. A << 4, 12, -16, 0,
  235. 12, 37, -43, 0,
  236. -16, -43, 98, 0,
  237. 0, 0, 0, 1;
  238. // clang-format on
  239. const Eigen::Vector4d b = Eigen::Vector4d::Ones();
  240. LinearSolver::Options options;
  241. options.max_num_refinement_iterations = 3;
  242. ContextImpl context;
  243. options.context = &context;
  244. std::string error;
  245. EXPECT_TRUE(context.InitCuda(&error)) << error;
  246. options.dense_linear_algebra_library_type = CUDA;
  247. options.use_mixed_precision_solves = true;
  248. auto solver = CUDADenseCholeskyMixedPrecision::Create(options);
  249. ASSERT_NE(solver, nullptr);
  250. std::string error_string;
  251. ASSERT_EQ(solver->Factorize(A.cols(), A.data(), &error_string),
  252. LinearSolverTerminationType::SUCCESS);
  253. Eigen::Vector4d x = Eigen::Vector4d::Zero();
  254. ASSERT_EQ(solver->Solve(b.data(), x.data(), &error_string),
  255. LinearSolverTerminationType::SUCCESS);
  256. // The error does not reduce beyond four iterations, and stagnates at this
  257. // level of precision.
  258. static const double kEpsilon = std::numeric_limits<double>::epsilon() * 100;
  259. const Eigen::Vector4d x_expected(113.75 / 3.0, -31.0 / 3.0, 5.0 / 3.0, 1.0);
  260. EXPECT_NEAR((x[0] - x_expected[0]) / x_expected[0], 0.0, kEpsilon);
  261. EXPECT_NEAR((x[1] - x_expected[1]) / x_expected[1], 0.0, kEpsilon);
  262. EXPECT_NEAR((x[2] - x_expected[2]) / x_expected[2], 0.0, kEpsilon);
  263. EXPECT_NEAR((x[3] - x_expected[3]) / x_expected[3], 0.0, kEpsilon);
  264. }
  265. TEST(CUDADenseCholeskyMixedPrecision, Randomized1600x1600Tests) {
  266. const int kNumCols = 1600;
  267. using LhsType = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>;
  268. using RhsType = Eigen::Matrix<double, Eigen::Dynamic, 1>;
  269. using SolutionType = Eigen::Matrix<double, Eigen::Dynamic, 1>;
  270. LinearSolver::Options options;
  271. ContextImpl context;
  272. options.context = &context;
  273. std::string error;
  274. EXPECT_TRUE(context.InitCuda(&error)) << error;
  275. options.dense_linear_algebra_library_type = ceres::CUDA;
  276. options.use_mixed_precision_solves = true;
  277. options.max_num_refinement_iterations = 20;
  278. std::unique_ptr<CUDADenseCholeskyMixedPrecision> dense_cholesky =
  279. CUDADenseCholeskyMixedPrecision::Create(options);
  280. const int kNumTrials = 20;
  281. for (int i = 0; i < kNumTrials; ++i) {
  282. LhsType lhs = LhsType::Random(kNumCols, kNumCols);
  283. lhs = lhs.transpose() * lhs;
  284. lhs += 1e-3 * LhsType::Identity(kNumCols, kNumCols);
  285. SolutionType x_expected = SolutionType::Random(kNumCols);
  286. RhsType rhs = lhs * x_expected;
  287. SolutionType x_computed = SolutionType::Zero(kNumCols);
  288. // Sanity check the random matrix sizes.
  289. EXPECT_EQ(lhs.rows(), kNumCols);
  290. EXPECT_EQ(lhs.cols(), kNumCols);
  291. EXPECT_EQ(rhs.rows(), kNumCols);
  292. EXPECT_EQ(rhs.cols(), 1);
  293. EXPECT_EQ(x_expected.rows(), kNumCols);
  294. EXPECT_EQ(x_expected.cols(), 1);
  295. EXPECT_EQ(x_computed.rows(), kNumCols);
  296. EXPECT_EQ(x_computed.cols(), 1);
  297. LinearSolver::Summary summary;
  298. summary.termination_type = dense_cholesky->FactorAndSolve(
  299. kNumCols, lhs.data(), rhs.data(), x_computed.data(), &summary.message);
  300. ASSERT_EQ(summary.termination_type, LinearSolverTerminationType::SUCCESS);
  301. static const double kEpsilon = std::numeric_limits<double>::epsilon() * 1e6;
  302. ASSERT_NEAR(
  303. (x_computed - x_expected).norm() / x_expected.norm(), 0.0, kEpsilon);
  304. }
  305. }
  306. #endif // CERES_NO_CUDA
  307. } // namespace ceres::internal