schur_complement_solver.cc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408
  1. // Ceres Solver - A fast non-linear least squares minimizer
  2. // Copyright 2023 Google Inc. All rights reserved.
  3. // http://ceres-solver.org/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are met:
  7. //
  8. // * Redistributions of source code must retain the above copyright notice,
  9. // this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above copyright notice,
  11. // this list of conditions and the following disclaimer in the documentation
  12. // and/or other materials provided with the distribution.
  13. // * Neither the name of Google Inc. nor the names of its contributors may be
  14. // used to endorse or promote products derived from this software without
  15. // specific prior written permission.
  16. //
  17. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18. // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21. // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22. // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23. // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24. // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25. // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26. // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27. // POSSIBILITY OF SUCH DAMAGE.
  28. //
  29. // Author: sameeragarwal@google.com (Sameer Agarwal)
  30. #include "ceres/schur_complement_solver.h"
  31. #include <algorithm>
  32. #include <ctime>
  33. #include <memory>
  34. #include <set>
  35. #include <utility>
  36. #include <vector>
  37. #include "Eigen/Dense"
  38. #include "Eigen/SparseCore"
  39. #include "ceres/block_random_access_dense_matrix.h"
  40. #include "ceres/block_random_access_matrix.h"
  41. #include "ceres/block_random_access_sparse_matrix.h"
  42. #include "ceres/block_sparse_matrix.h"
  43. #include "ceres/block_structure.h"
  44. #include "ceres/conjugate_gradients_solver.h"
  45. #include "ceres/detect_structure.h"
  46. #include "ceres/internal/eigen.h"
  47. #include "ceres/linear_solver.h"
  48. #include "ceres/sparse_cholesky.h"
  49. #include "ceres/triplet_sparse_matrix.h"
  50. #include "ceres/types.h"
  51. #include "ceres/wall_time.h"
  52. namespace ceres::internal {
  53. namespace {
  54. class BlockRandomAccessSparseMatrixAdapter final
  55. : public ConjugateGradientsLinearOperator<Vector> {
  56. public:
  57. explicit BlockRandomAccessSparseMatrixAdapter(
  58. const BlockRandomAccessSparseMatrix& m)
  59. : m_(m) {}
  60. void RightMultiplyAndAccumulate(const Vector& x, Vector& y) final {
  61. m_.SymmetricRightMultiplyAndAccumulate(x.data(), y.data());
  62. }
  63. private:
  64. const BlockRandomAccessSparseMatrix& m_;
  65. };
  66. class BlockRandomAccessDiagonalMatrixAdapter final
  67. : public ConjugateGradientsLinearOperator<Vector> {
  68. public:
  69. explicit BlockRandomAccessDiagonalMatrixAdapter(
  70. const BlockRandomAccessDiagonalMatrix& m)
  71. : m_(m) {}
  72. // y = y + Ax;
  73. void RightMultiplyAndAccumulate(const Vector& x, Vector& y) final {
  74. m_.RightMultiplyAndAccumulate(x.data(), y.data());
  75. }
  76. private:
  77. const BlockRandomAccessDiagonalMatrix& m_;
  78. };
  79. } // namespace
  80. SchurComplementSolver::SchurComplementSolver(
  81. const LinearSolver::Options& options)
  82. : options_(options) {
  83. CHECK_GT(options.elimination_groups.size(), 1);
  84. CHECK_GT(options.elimination_groups[0], 0);
  85. CHECK(options.context != nullptr);
  86. }
  87. LinearSolver::Summary SchurComplementSolver::SolveImpl(
  88. BlockSparseMatrix* A,
  89. const double* b,
  90. const LinearSolver::PerSolveOptions& per_solve_options,
  91. double* x) {
  92. EventLogger event_logger("SchurComplementSolver::Solve");
  93. const CompressedRowBlockStructure* bs = A->block_structure();
  94. if (eliminator_ == nullptr) {
  95. const int num_eliminate_blocks = options_.elimination_groups[0];
  96. const int num_f_blocks = bs->cols.size() - num_eliminate_blocks;
  97. InitStorage(bs);
  98. DetectStructure(*bs,
  99. num_eliminate_blocks,
  100. &options_.row_block_size,
  101. &options_.e_block_size,
  102. &options_.f_block_size);
  103. // For the special case of the static structure <2,3,6> with
  104. // exactly one f block use the SchurEliminatorForOneFBlock.
  105. //
  106. // TODO(sameeragarwal): A more scalable template specialization
  107. // mechanism that does not cause binary bloat.
  108. if (options_.row_block_size == 2 && options_.e_block_size == 3 &&
  109. options_.f_block_size == 6 && num_f_blocks == 1) {
  110. eliminator_ = std::make_unique<SchurEliminatorForOneFBlock<2, 3, 6>>();
  111. } else {
  112. eliminator_ = SchurEliminatorBase::Create(options_);
  113. }
  114. CHECK(eliminator_);
  115. const bool kFullRankETE = true;
  116. eliminator_->Init(num_eliminate_blocks, kFullRankETE, bs);
  117. }
  118. std::fill(x, x + A->num_cols(), 0.0);
  119. event_logger.AddEvent("Setup");
  120. eliminator_->Eliminate(BlockSparseMatrixData(*A),
  121. b,
  122. per_solve_options.D,
  123. lhs_.get(),
  124. rhs_.data());
  125. event_logger.AddEvent("Eliminate");
  126. double* reduced_solution = x + A->num_cols() - lhs_->num_cols();
  127. const LinearSolver::Summary summary =
  128. SolveReducedLinearSystem(per_solve_options, reduced_solution);
  129. event_logger.AddEvent("ReducedSolve");
  130. if (summary.termination_type == LinearSolverTerminationType::SUCCESS) {
  131. eliminator_->BackSubstitute(
  132. BlockSparseMatrixData(*A), b, per_solve_options.D, reduced_solution, x);
  133. event_logger.AddEvent("BackSubstitute");
  134. }
  135. return summary;
  136. }
  137. DenseSchurComplementSolver::DenseSchurComplementSolver(
  138. const LinearSolver::Options& options)
  139. : SchurComplementSolver(options),
  140. cholesky_(DenseCholesky::Create(options)) {}
  141. DenseSchurComplementSolver::~DenseSchurComplementSolver() = default;
  142. // Initialize a BlockRandomAccessDenseMatrix to store the Schur
  143. // complement.
  144. void DenseSchurComplementSolver::InitStorage(
  145. const CompressedRowBlockStructure* bs) {
  146. const int num_eliminate_blocks = options().elimination_groups[0];
  147. const int num_col_blocks = bs->cols.size();
  148. auto blocks = Tail(bs->cols, num_col_blocks - num_eliminate_blocks);
  149. set_lhs(std::make_unique<BlockRandomAccessDenseMatrix>(
  150. blocks, options().context, options().num_threads));
  151. ResizeRhs(lhs()->num_rows());
  152. }
  153. // Solve the system Sx = r, assuming that the matrix S is stored in a
  154. // BlockRandomAccessDenseMatrix. The linear system is solved using
  155. // Eigen's Cholesky factorization.
  156. LinearSolver::Summary DenseSchurComplementSolver::SolveReducedLinearSystem(
  157. const LinearSolver::PerSolveOptions& /*per_solve_options*/,
  158. double* solution) {
  159. LinearSolver::Summary summary;
  160. summary.num_iterations = 0;
  161. summary.termination_type = LinearSolverTerminationType::SUCCESS;
  162. summary.message = "Success.";
  163. auto* m = down_cast<BlockRandomAccessDenseMatrix*>(mutable_lhs());
  164. const int num_rows = m->num_rows();
  165. // The case where there are no f blocks, and the system is block
  166. // diagonal.
  167. if (num_rows == 0) {
  168. return summary;
  169. }
  170. summary.num_iterations = 1;
  171. summary.termination_type = cholesky_->FactorAndSolve(
  172. num_rows, m->mutable_values(), rhs().data(), solution, &summary.message);
  173. return summary;
  174. }
  175. SparseSchurComplementSolver::SparseSchurComplementSolver(
  176. const LinearSolver::Options& options)
  177. : SchurComplementSolver(options) {
  178. if (options.type != ITERATIVE_SCHUR) {
  179. sparse_cholesky_ = SparseCholesky::Create(options);
  180. }
  181. }
  182. SparseSchurComplementSolver::~SparseSchurComplementSolver() {
  183. for (int i = 0; i < 4; ++i) {
  184. if (scratch_[i]) {
  185. delete scratch_[i];
  186. scratch_[i] = nullptr;
  187. }
  188. }
  189. }
  190. // Determine the non-zero blocks in the Schur Complement matrix, and
  191. // initialize a BlockRandomAccessSparseMatrix object.
  192. void SparseSchurComplementSolver::InitStorage(
  193. const CompressedRowBlockStructure* bs) {
  194. const int num_eliminate_blocks = options().elimination_groups[0];
  195. const int num_col_blocks = bs->cols.size();
  196. const int num_row_blocks = bs->rows.size();
  197. blocks_ = Tail(bs->cols, num_col_blocks - num_eliminate_blocks);
  198. std::set<std::pair<int, int>> block_pairs;
  199. for (int i = 0; i < blocks_.size(); ++i) {
  200. block_pairs.emplace(i, i);
  201. }
  202. int r = 0;
  203. while (r < num_row_blocks) {
  204. int e_block_id = bs->rows[r].cells.front().block_id;
  205. if (e_block_id >= num_eliminate_blocks) {
  206. break;
  207. }
  208. std::vector<int> f_blocks;
  209. // Add to the chunk until the first block in the row is
  210. // different than the one in the first row for the chunk.
  211. for (; r < num_row_blocks; ++r) {
  212. const CompressedRow& row = bs->rows[r];
  213. if (row.cells.front().block_id != e_block_id) {
  214. break;
  215. }
  216. // Iterate over the blocks in the row, ignoring the first
  217. // block since it is the one to be eliminated.
  218. for (int c = 1; c < row.cells.size(); ++c) {
  219. const Cell& cell = row.cells[c];
  220. f_blocks.push_back(cell.block_id - num_eliminate_blocks);
  221. }
  222. }
  223. sort(f_blocks.begin(), f_blocks.end());
  224. f_blocks.erase(unique(f_blocks.begin(), f_blocks.end()), f_blocks.end());
  225. for (int i = 0; i < f_blocks.size(); ++i) {
  226. for (int j = i + 1; j < f_blocks.size(); ++j) {
  227. block_pairs.emplace(f_blocks[i], f_blocks[j]);
  228. }
  229. }
  230. }
  231. // Remaining rows do not contribute to the chunks and directly go
  232. // into the schur complement via an outer product.
  233. for (; r < num_row_blocks; ++r) {
  234. const CompressedRow& row = bs->rows[r];
  235. CHECK_GE(row.cells.front().block_id, num_eliminate_blocks);
  236. for (int i = 0; i < row.cells.size(); ++i) {
  237. int r_block1_id = row.cells[i].block_id - num_eliminate_blocks;
  238. for (const auto& cell : row.cells) {
  239. int r_block2_id = cell.block_id - num_eliminate_blocks;
  240. if (r_block1_id <= r_block2_id) {
  241. block_pairs.emplace(r_block1_id, r_block2_id);
  242. }
  243. }
  244. }
  245. }
  246. set_lhs(std::make_unique<BlockRandomAccessSparseMatrix>(
  247. blocks_, block_pairs, options().context, options().num_threads));
  248. ResizeRhs(lhs()->num_rows());
  249. }
  250. LinearSolver::Summary SparseSchurComplementSolver::SolveReducedLinearSystem(
  251. const LinearSolver::PerSolveOptions& per_solve_options, double* solution) {
  252. if (options().type == ITERATIVE_SCHUR) {
  253. return SolveReducedLinearSystemUsingConjugateGradients(per_solve_options,
  254. solution);
  255. }
  256. LinearSolver::Summary summary;
  257. summary.num_iterations = 0;
  258. summary.termination_type = LinearSolverTerminationType::SUCCESS;
  259. summary.message = "Success.";
  260. const BlockSparseMatrix* bsm =
  261. down_cast<const BlockRandomAccessSparseMatrix*>(lhs())->matrix();
  262. if (bsm->num_rows() == 0) {
  263. return summary;
  264. }
  265. const CompressedRowSparseMatrix::StorageType storage_type =
  266. sparse_cholesky_->StorageType();
  267. if (storage_type ==
  268. CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR) {
  269. if (!crs_lhs_) {
  270. crs_lhs_ = bsm->ToCompressedRowSparseMatrix();
  271. crs_lhs_->set_storage_type(
  272. CompressedRowSparseMatrix::StorageType::UPPER_TRIANGULAR);
  273. } else {
  274. bsm->UpdateCompressedRowSparseMatrix(crs_lhs_.get());
  275. }
  276. } else {
  277. if (!crs_lhs_) {
  278. crs_lhs_ = bsm->ToCompressedRowSparseMatrixTranspose();
  279. crs_lhs_->set_storage_type(
  280. CompressedRowSparseMatrix::StorageType::LOWER_TRIANGULAR);
  281. } else {
  282. bsm->UpdateCompressedRowSparseMatrixTranspose(crs_lhs_.get());
  283. }
  284. }
  285. summary.num_iterations = 1;
  286. summary.termination_type = sparse_cholesky_->FactorAndSolve(
  287. crs_lhs_.get(), rhs().data(), solution, &summary.message);
  288. return summary;
  289. }
  290. LinearSolver::Summary
  291. SparseSchurComplementSolver::SolveReducedLinearSystemUsingConjugateGradients(
  292. const LinearSolver::PerSolveOptions& per_solve_options, double* solution) {
  293. CHECK(options().use_explicit_schur_complement);
  294. const int num_rows = lhs()->num_rows();
  295. // The case where there are no f blocks, and the system is block
  296. // diagonal.
  297. if (num_rows == 0) {
  298. LinearSolver::Summary summary;
  299. summary.num_iterations = 0;
  300. summary.termination_type = LinearSolverTerminationType::SUCCESS;
  301. summary.message = "Success.";
  302. return summary;
  303. }
  304. // Only SCHUR_JACOBI is supported over here right now.
  305. CHECK_EQ(options().preconditioner_type, SCHUR_JACOBI);
  306. if (preconditioner_ == nullptr) {
  307. preconditioner_ = std::make_unique<BlockRandomAccessDiagonalMatrix>(
  308. blocks_, options().context, options().num_threads);
  309. }
  310. auto* sc = down_cast<BlockRandomAccessSparseMatrix*>(mutable_lhs());
  311. // Extract block diagonal from the Schur complement to construct the
  312. // schur_jacobi preconditioner.
  313. for (int i = 0; i < blocks_.size(); ++i) {
  314. const int block_size = blocks_[i].size;
  315. int sc_r, sc_c, sc_row_stride, sc_col_stride;
  316. CellInfo* sc_cell_info =
  317. sc->GetCell(i, i, &sc_r, &sc_c, &sc_row_stride, &sc_col_stride);
  318. CHECK(sc_cell_info != nullptr);
  319. MatrixRef sc_m(sc_cell_info->values, sc_row_stride, sc_col_stride);
  320. int pre_r, pre_c, pre_row_stride, pre_col_stride;
  321. CellInfo* pre_cell_info = preconditioner_->GetCell(
  322. i, i, &pre_r, &pre_c, &pre_row_stride, &pre_col_stride);
  323. CHECK(pre_cell_info != nullptr);
  324. MatrixRef pre_m(pre_cell_info->values, pre_row_stride, pre_col_stride);
  325. pre_m.block(pre_r, pre_c, block_size, block_size) =
  326. sc_m.block(sc_r, sc_c, block_size, block_size);
  327. }
  328. preconditioner_->Invert();
  329. VectorRef(solution, num_rows).setZero();
  330. auto lhs = std::make_unique<BlockRandomAccessSparseMatrixAdapter>(*sc);
  331. auto preconditioner =
  332. std::make_unique<BlockRandomAccessDiagonalMatrixAdapter>(
  333. *preconditioner_);
  334. ConjugateGradientsSolverOptions cg_options;
  335. cg_options.min_num_iterations = options().min_num_iterations;
  336. cg_options.max_num_iterations = options().max_num_iterations;
  337. cg_options.residual_reset_period = options().residual_reset_period;
  338. cg_options.q_tolerance = per_solve_options.q_tolerance;
  339. cg_options.r_tolerance = per_solve_options.r_tolerance;
  340. cg_solution_ = Vector::Zero(sc->num_rows());
  341. for (int i = 0; i < 4; ++i) {
  342. if (scratch_[i] == nullptr) {
  343. scratch_[i] = new Vector(sc->num_rows());
  344. }
  345. }
  346. auto summary = ConjugateGradientsSolver<Vector>(
  347. cg_options, *lhs, rhs(), *preconditioner, scratch_, cg_solution_);
  348. VectorRef(solution, sc->num_rows()) = cg_solution_;
  349. return summary;
  350. }
  351. } // namespace ceres::internal