// Ceres Solver - A fast non-linear least squares minimizer // Copyright 2023 Google Inc. All rights reserved. // http://ceres-solver.org/ // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // * Neither the name of Google Inc. nor the names of its contributors may be // used to endorse or promote products derived from this software without // specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // // Author: keir@google.com (Keir Mierle) #include "ceres/block_jacobi_preconditioner.h" #include #include #include #include #include "Eigen/Dense" #include "ceres/block_random_access_diagonal_matrix.h" #include "ceres/block_sparse_matrix.h" #include "ceres/block_structure.h" #include "ceres/casts.h" #include "ceres/internal/eigen.h" #include "ceres/parallel_for.h" #include "ceres/small_blas.h" namespace ceres::internal { BlockSparseJacobiPreconditioner::BlockSparseJacobiPreconditioner( Preconditioner::Options options, const BlockSparseMatrix& A) : options_(std::move(options)) { m_ = std::make_unique( A.block_structure()->cols, options_.context, options_.num_threads); } BlockSparseJacobiPreconditioner::~BlockSparseJacobiPreconditioner() = default; bool BlockSparseJacobiPreconditioner::UpdateImpl(const BlockSparseMatrix& A, const double* D) { const CompressedRowBlockStructure* bs = A.block_structure(); const double* values = A.values(); m_->SetZero(); ParallelFor(options_.context, 0, bs->rows.size(), options_.num_threads, [this, bs, values](int i) { const int row_block_size = bs->rows[i].block.size; const std::vector& cells = bs->rows[i].cells; for (const auto& cell : cells) { const int block_id = cell.block_id; const int col_block_size = bs->cols[block_id].size; int r, c, row_stride, col_stride; CellInfo* cell_info = m_->GetCell( block_id, block_id, &r, &c, &row_stride, &col_stride); MatrixRef m(cell_info->values, row_stride, col_stride); ConstMatrixRef b( values + cell.position, row_block_size, col_block_size); auto lock = MakeConditionalLock(options_.num_threads, cell_info->m); // clang-format off MatrixTransposeMatrixMultiply( values + cell.position, row_block_size,col_block_size, values + cell.position, row_block_size,col_block_size, cell_info->values,r, c,row_stride,col_stride); // clang-format on } }); if (D != nullptr) { // Add the diagonal. ParallelFor(options_.context, 0, bs->cols.size(), options_.num_threads, [this, bs, D](int i) { const int block_size = bs->cols[i].size; int r, c, row_stride, col_stride; CellInfo* cell_info = m_->GetCell(i, i, &r, &c, &row_stride, &col_stride); MatrixRef m(cell_info->values, row_stride, col_stride); m.block(r, c, block_size, block_size).diagonal() += ConstVectorRef(D + bs->cols[i].position, block_size) .array() .square() .matrix(); }); } m_->Invert(); return true; } BlockCRSJacobiPreconditioner::BlockCRSJacobiPreconditioner( Preconditioner::Options options, const CompressedRowSparseMatrix& A) : options_(std::move(options)), locks_(A.col_blocks().size()) { auto& col_blocks = A.col_blocks(); // Compute the number of non-zeros in the preconditioner. This is needed so // that we can construct the CompressedRowSparseMatrix. const int m_nnz = SumSquaredSizes(col_blocks); m_ = std::make_unique( A.num_cols(), A.num_cols(), m_nnz); const int num_col_blocks = col_blocks.size(); // Populate the sparsity structure of the preconditioner matrix. int* m_cols = m_->mutable_cols(); int* m_rows = m_->mutable_rows(); m_rows[0] = 0; for (int i = 0, idx = 0; i < num_col_blocks; ++i) { // For each column block populate a diagonal block in the preconditioner. // Not that the because of the way the CompressedRowSparseMatrix format // works, the entire diagonal block is laid out contiguously in memory as a // row-major matrix. We will use this when updating the block. auto& block = col_blocks[i]; for (int j = 0; j < block.size; ++j) { for (int k = 0; k < block.size; ++k, ++idx) { m_cols[idx] = block.position + k; } m_rows[block.position + j + 1] = idx; } } // In reality we only need num_col_blocks locks, however that would require // that in UpdateImpl we are able to look up the column block from the it // first column. To save ourselves this map we will instead spend a few extra // lock objects. std::vector locks(A.num_cols()); locks_.swap(locks); CHECK_EQ(m_rows[A.num_cols()], m_nnz); } BlockCRSJacobiPreconditioner::~BlockCRSJacobiPreconditioner() = default; bool BlockCRSJacobiPreconditioner::UpdateImpl( const CompressedRowSparseMatrix& A, const double* D) { const auto& col_blocks = A.col_blocks(); const auto& row_blocks = A.row_blocks(); const int num_col_blocks = col_blocks.size(); const int num_row_blocks = row_blocks.size(); const int* a_rows = A.rows(); const int* a_cols = A.cols(); const double* a_values = A.values(); double* m_values = m_->mutable_values(); const int* m_rows = m_->rows(); m_->SetZero(); ParallelFor( options_.context, 0, num_row_blocks, options_.num_threads, [this, row_blocks, a_rows, a_cols, a_values, m_values, m_rows](int i) { const int row = row_blocks[i].position; const int row_block_size = row_blocks[i].size; const int row_nnz = a_rows[row + 1] - a_rows[row]; ConstMatrixRef row_block( a_values + a_rows[row], row_block_size, row_nnz); int c = 0; while (c < row_nnz) { const int idx = a_rows[row] + c; const int col = a_cols[idx]; const int col_block_size = m_rows[col + 1] - m_rows[col]; // We make use of the fact that the entire diagonal block is // stored contiguously in memory as a row-major matrix. MatrixRef m(m_values + m_rows[col], col_block_size, col_block_size); // We do not have a row_stride version of // MatrixTransposeMatrixMultiply, otherwise we could use it // here to further speed up the following expression. auto b = row_block.middleCols(c, col_block_size); auto lock = MakeConditionalLock(options_.num_threads, locks_[col]); m.noalias() += b.transpose() * b; c += col_block_size; } }); ParallelFor( options_.context, 0, num_col_blocks, options_.num_threads, [col_blocks, m_rows, m_values, D](int i) { const int col = col_blocks[i].position; const int col_block_size = col_blocks[i].size; MatrixRef m(m_values + m_rows[col], col_block_size, col_block_size); if (D != nullptr) { m.diagonal() += ConstVectorRef(D + col, col_block_size).array().square().matrix(); } // TODO(sameeragarwal): Deal with Cholesky inversion failure here and // elsewhere. m = m.llt().solve(Matrix::Identity(col_block_size, col_block_size)); }); return true; } } // namespace ceres::internal