123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843 |
- // Ceres Solver - A fast non-linear least squares minimizer
- // Copyright 2023 Google Inc. All rights reserved.
- // http://ceres-solver.org/
- //
- // Redistribution and use in source and binary forms, with or without
- // modification, are permitted provided that the following conditions are met:
- //
- // * Redistributions of source code must retain the above copyright notice,
- // this list of conditions and the following disclaimer.
- // * Redistributions in binary form must reproduce the above copyright notice,
- // this list of conditions and the following disclaimer in the documentation
- // and/or other materials provided with the distribution.
- // * Neither the name of Google Inc. nor the names of its contributors may be
- // used to endorse or promote products derived from this software without
- // specific prior written permission.
- //
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- // POSSIBILITY OF SUCH DAMAGE.
- //
- // Author: sameeragarwal@google.com (Sameer Agarwal)
- #include "ceres/block_sparse_matrix.h"
- #include <algorithm>
- #include <cstddef>
- #include <memory>
- #include <numeric>
- #include <random>
- #include <vector>
- #include "ceres/block_structure.h"
- #include "ceres/crs_matrix.h"
- #include "ceres/internal/eigen.h"
- #include "ceres/parallel_for.h"
- #include "ceres/parallel_vector_ops.h"
- #include "ceres/small_blas.h"
- #include "ceres/triplet_sparse_matrix.h"
- #include "glog/logging.h"
- #ifndef CERES_NO_CUDA
- #include "cuda_runtime.h"
- #endif
- namespace ceres::internal {
- namespace {
- void ComputeCumulativeNumberOfNonZeros(std::vector<CompressedList>& rows) {
- if (rows.empty()) {
- return;
- }
- rows[0].cumulative_nnz = rows[0].nnz;
- for (int c = 1; c < rows.size(); ++c) {
- const int curr_nnz = rows[c].nnz;
- rows[c].cumulative_nnz = curr_nnz + rows[c - 1].cumulative_nnz;
- }
- }
- template <bool transpose>
- std::unique_ptr<CompressedRowSparseMatrix>
- CreateStructureOfCompressedRowSparseMatrix(
- const double* values,
- int num_rows,
- int num_cols,
- int num_nonzeros,
- const CompressedRowBlockStructure* block_structure) {
- auto crs_matrix = std::make_unique<CompressedRowSparseMatrix>(
- num_rows, num_cols, num_nonzeros);
- auto crs_cols = crs_matrix->mutable_cols();
- auto crs_rows = crs_matrix->mutable_rows();
- int value_offset = 0;
- const int num_row_blocks = block_structure->rows.size();
- const auto& cols = block_structure->cols;
- *crs_rows++ = 0;
- for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) {
- const auto& row_block = block_structure->rows[row_block_id];
- // Empty row block: only requires setting row offsets
- if (row_block.cells.empty()) {
- std::fill(crs_rows, crs_rows + row_block.block.size, value_offset);
- crs_rows += row_block.block.size;
- continue;
- }
- int row_nnz = 0;
- if constexpr (transpose) {
- // Transposed block structure comes with nnz in row-block filled-in
- row_nnz = row_block.nnz / row_block.block.size;
- } else {
- // Nnz field of non-transposed block structure is not filled and it can
- // have non-sequential structure (consider the case of jacobian for
- // Schur-complement solver: E and F blocks are stored separately).
- for (auto& c : row_block.cells) {
- row_nnz += cols[c.block_id].size;
- }
- }
- // Row-wise setup of matrix structure
- for (int row = 0; row < row_block.block.size; ++row) {
- value_offset += row_nnz;
- *crs_rows++ = value_offset;
- for (auto& c : row_block.cells) {
- const int col_block_size = cols[c.block_id].size;
- const int col_position = cols[c.block_id].position;
- std::iota(crs_cols, crs_cols + col_block_size, col_position);
- crs_cols += col_block_size;
- }
- }
- }
- return crs_matrix;
- }
- template <bool transpose>
- void UpdateCompressedRowSparseMatrixImpl(
- CompressedRowSparseMatrix* crs_matrix,
- const double* values,
- const CompressedRowBlockStructure* block_structure) {
- auto crs_values = crs_matrix->mutable_values();
- auto crs_rows = crs_matrix->mutable_rows();
- const int num_row_blocks = block_structure->rows.size();
- const auto& cols = block_structure->cols;
- for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) {
- const auto& row_block = block_structure->rows[row_block_id];
- const int row_block_size = row_block.block.size;
- const int row_nnz = crs_rows[1] - crs_rows[0];
- crs_rows += row_block_size;
- if (row_nnz == 0) {
- continue;
- }
- MatrixRef crs_row_block(crs_values, row_block_size, row_nnz);
- int col_offset = 0;
- for (auto& c : row_block.cells) {
- const int col_block_size = cols[c.block_id].size;
- auto crs_cell =
- crs_row_block.block(0, col_offset, row_block_size, col_block_size);
- if constexpr (transpose) {
- // Transposed matrix is filled using transposed block-strucutre
- ConstMatrixRef cell(
- values + c.position, col_block_size, row_block_size);
- crs_cell = cell.transpose();
- } else {
- ConstMatrixRef cell(
- values + c.position, row_block_size, col_block_size);
- crs_cell = cell;
- }
- col_offset += col_block_size;
- }
- crs_values += row_nnz * row_block_size;
- }
- }
- void SetBlockStructureOfCompressedRowSparseMatrix(
- CompressedRowSparseMatrix* crs_matrix,
- CompressedRowBlockStructure* block_structure) {
- const int num_row_blocks = block_structure->rows.size();
- auto& row_blocks = *crs_matrix->mutable_row_blocks();
- row_blocks.resize(num_row_blocks);
- for (int i = 0; i < num_row_blocks; ++i) {
- row_blocks[i] = block_structure->rows[i].block;
- }
- auto& col_blocks = *crs_matrix->mutable_col_blocks();
- col_blocks = block_structure->cols;
- }
- } // namespace
- BlockSparseMatrix::BlockSparseMatrix(
- CompressedRowBlockStructure* block_structure, bool use_page_locked_memory)
- : use_page_locked_memory_(use_page_locked_memory),
- num_rows_(0),
- num_cols_(0),
- num_nonzeros_(0),
- block_structure_(block_structure) {
- CHECK(block_structure_ != nullptr);
- // Count the number of columns in the matrix.
- for (auto& col : block_structure_->cols) {
- num_cols_ += col.size;
- }
- // Count the number of non-zero entries and the number of rows in
- // the matrix.
- for (int i = 0; i < block_structure_->rows.size(); ++i) {
- int row_block_size = block_structure_->rows[i].block.size;
- num_rows_ += row_block_size;
- const std::vector<Cell>& cells = block_structure_->rows[i].cells;
- for (const auto& cell : cells) {
- int col_block_id = cell.block_id;
- int col_block_size = block_structure_->cols[col_block_id].size;
- num_nonzeros_ += col_block_size * row_block_size;
- }
- }
- CHECK_GE(num_rows_, 0);
- CHECK_GE(num_cols_, 0);
- CHECK_GE(num_nonzeros_, 0);
- VLOG(2) << "Allocating values array with " << num_nonzeros_ * sizeof(double)
- << " bytes."; // NOLINT
- values_ = AllocateValues(num_nonzeros_);
- max_num_nonzeros_ = num_nonzeros_;
- CHECK(values_ != nullptr);
- AddTransposeBlockStructure();
- }
- BlockSparseMatrix::~BlockSparseMatrix() { FreeValues(values_); }
- void BlockSparseMatrix::AddTransposeBlockStructure() {
- if (transpose_block_structure_ == nullptr) {
- transpose_block_structure_ = CreateTranspose(*block_structure_);
- }
- }
- void BlockSparseMatrix::SetZero() {
- std::fill(values_, values_ + num_nonzeros_, 0.0);
- }
- void BlockSparseMatrix::SetZero(ContextImpl* context, int num_threads) {
- ParallelSetZero(context, num_threads, values_, num_nonzeros_);
- }
- void BlockSparseMatrix::RightMultiplyAndAccumulate(const double* x,
- double* y) const {
- RightMultiplyAndAccumulate(x, y, nullptr, 1);
- }
- void BlockSparseMatrix::RightMultiplyAndAccumulate(const double* x,
- double* y,
- ContextImpl* context,
- int num_threads) const {
- CHECK(x != nullptr);
- CHECK(y != nullptr);
- const auto values = values_;
- const auto block_structure = block_structure_.get();
- const auto num_row_blocks = block_structure->rows.size();
- ParallelFor(context,
- 0,
- num_row_blocks,
- num_threads,
- [values, block_structure, x, y](int row_block_id) {
- const int row_block_pos =
- block_structure->rows[row_block_id].block.position;
- const int row_block_size =
- block_structure->rows[row_block_id].block.size;
- const auto& cells = block_structure->rows[row_block_id].cells;
- for (const auto& cell : cells) {
- const int col_block_id = cell.block_id;
- const int col_block_size =
- block_structure->cols[col_block_id].size;
- const int col_block_pos =
- block_structure->cols[col_block_id].position;
- MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
- values + cell.position,
- row_block_size,
- col_block_size,
- x + col_block_pos,
- y + row_block_pos);
- }
- });
- }
- // TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method
- // might benefit from caching column-block partition
- void BlockSparseMatrix::LeftMultiplyAndAccumulate(const double* x,
- double* y,
- ContextImpl* context,
- int num_threads) const {
- // While utilizing transposed structure allows to perform parallel
- // left-multiplication by dense vector, it makes access patterns to matrix
- // elements scattered. Thus, multiplication using transposed structure
- // is only useful for parallel execution
- CHECK(x != nullptr);
- CHECK(y != nullptr);
- if (transpose_block_structure_ == nullptr || num_threads == 1) {
- LeftMultiplyAndAccumulate(x, y);
- return;
- }
- auto transpose_bs = transpose_block_structure_.get();
- const auto values = values_;
- const int num_col_blocks = transpose_bs->rows.size();
- if (!num_col_blocks) {
- return;
- }
- // Use non-zero count as iteration cost for guided parallel-for loop
- ParallelFor(
- context,
- 0,
- num_col_blocks,
- num_threads,
- [values, transpose_bs, x, y](int row_block_id) {
- int row_block_pos = transpose_bs->rows[row_block_id].block.position;
- int row_block_size = transpose_bs->rows[row_block_id].block.size;
- auto& cells = transpose_bs->rows[row_block_id].cells;
- for (auto& cell : cells) {
- const int col_block_id = cell.block_id;
- const int col_block_size = transpose_bs->cols[col_block_id].size;
- const int col_block_pos = transpose_bs->cols[col_block_id].position;
- MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
- values + cell.position,
- col_block_size,
- row_block_size,
- x + col_block_pos,
- y + row_block_pos);
- }
- },
- transpose_bs->rows.data(),
- [](const CompressedRow& row) { return row.cumulative_nnz; });
- }
- void BlockSparseMatrix::LeftMultiplyAndAccumulate(const double* x,
- double* y) const {
- CHECK(x != nullptr);
- CHECK(y != nullptr);
- // Single-threaded left products are always computed using a non-transpose
- // block structure, because it has linear acess pattern to matrix elements
- for (int i = 0; i < block_structure_->rows.size(); ++i) {
- int row_block_pos = block_structure_->rows[i].block.position;
- int row_block_size = block_structure_->rows[i].block.size;
- const auto& cells = block_structure_->rows[i].cells;
- for (const auto& cell : cells) {
- int col_block_id = cell.block_id;
- int col_block_size = block_structure_->cols[col_block_id].size;
- int col_block_pos = block_structure_->cols[col_block_id].position;
- MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
- values_ + cell.position,
- row_block_size,
- col_block_size,
- x + row_block_pos,
- y + col_block_pos);
- }
- }
- }
- void BlockSparseMatrix::SquaredColumnNorm(double* x) const {
- CHECK(x != nullptr);
- VectorRef(x, num_cols_).setZero();
- for (int i = 0; i < block_structure_->rows.size(); ++i) {
- int row_block_size = block_structure_->rows[i].block.size;
- auto& cells = block_structure_->rows[i].cells;
- for (const auto& cell : cells) {
- int col_block_id = cell.block_id;
- int col_block_size = block_structure_->cols[col_block_id].size;
- int col_block_pos = block_structure_->cols[col_block_id].position;
- const MatrixRef m(
- values_ + cell.position, row_block_size, col_block_size);
- VectorRef(x + col_block_pos, col_block_size) += m.colwise().squaredNorm();
- }
- }
- }
- // TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method
- // might benefit from caching column-block partition
- void BlockSparseMatrix::SquaredColumnNorm(double* x,
- ContextImpl* context,
- int num_threads) const {
- if (transpose_block_structure_ == nullptr || num_threads == 1) {
- SquaredColumnNorm(x);
- return;
- }
- CHECK(x != nullptr);
- ParallelSetZero(context, num_threads, x, num_cols_);
- auto transpose_bs = transpose_block_structure_.get();
- const auto values = values_;
- const int num_col_blocks = transpose_bs->rows.size();
- ParallelFor(
- context,
- 0,
- num_col_blocks,
- num_threads,
- [values, transpose_bs, x](int row_block_id) {
- const auto& row = transpose_bs->rows[row_block_id];
- for (auto& cell : row.cells) {
- const auto& col = transpose_bs->cols[cell.block_id];
- const MatrixRef m(values + cell.position, col.size, row.block.size);
- VectorRef(x + row.block.position, row.block.size) +=
- m.colwise().squaredNorm();
- }
- },
- transpose_bs->rows.data(),
- [](const CompressedRow& row) { return row.cumulative_nnz; });
- }
- void BlockSparseMatrix::ScaleColumns(const double* scale) {
- CHECK(scale != nullptr);
- for (int i = 0; i < block_structure_->rows.size(); ++i) {
- int row_block_size = block_structure_->rows[i].block.size;
- auto& cells = block_structure_->rows[i].cells;
- for (const auto& cell : cells) {
- int col_block_id = cell.block_id;
- int col_block_size = block_structure_->cols[col_block_id].size;
- int col_block_pos = block_structure_->cols[col_block_id].position;
- MatrixRef m(values_ + cell.position, row_block_size, col_block_size);
- m *= ConstVectorRef(scale + col_block_pos, col_block_size).asDiagonal();
- }
- }
- }
- // TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method
- // might benefit from caching column-block partition
- void BlockSparseMatrix::ScaleColumns(const double* scale,
- ContextImpl* context,
- int num_threads) {
- if (transpose_block_structure_ == nullptr || num_threads == 1) {
- ScaleColumns(scale);
- return;
- }
- CHECK(scale != nullptr);
- auto transpose_bs = transpose_block_structure_.get();
- auto values = values_;
- const int num_col_blocks = transpose_bs->rows.size();
- ParallelFor(
- context,
- 0,
- num_col_blocks,
- num_threads,
- [values, transpose_bs, scale](int row_block_id) {
- const auto& row = transpose_bs->rows[row_block_id];
- for (auto& cell : row.cells) {
- const auto& col = transpose_bs->cols[cell.block_id];
- MatrixRef m(values + cell.position, col.size, row.block.size);
- m *= ConstVectorRef(scale + row.block.position, row.block.size)
- .asDiagonal();
- }
- },
- transpose_bs->rows.data(),
- [](const CompressedRow& row) { return row.cumulative_nnz; });
- }
- std::unique_ptr<CompressedRowSparseMatrix>
- BlockSparseMatrix::ToCompressedRowSparseMatrixTranspose() const {
- auto bs = transpose_block_structure_.get();
- auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<true>(
- values(), num_cols_, num_rows_, num_nonzeros_, bs);
- SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(), bs);
- UpdateCompressedRowSparseMatrixTranspose(crs_matrix.get());
- return crs_matrix;
- }
- std::unique_ptr<CompressedRowSparseMatrix>
- BlockSparseMatrix::ToCompressedRowSparseMatrix() const {
- auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<false>(
- values(), num_rows_, num_cols_, num_nonzeros_, block_structure_.get());
- SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(),
- block_structure_.get());
- UpdateCompressedRowSparseMatrix(crs_matrix.get());
- return crs_matrix;
- }
- void BlockSparseMatrix::UpdateCompressedRowSparseMatrixTranspose(
- CompressedRowSparseMatrix* crs_matrix) const {
- CHECK(crs_matrix != nullptr);
- CHECK_EQ(crs_matrix->num_rows(), num_cols_);
- CHECK_EQ(crs_matrix->num_cols(), num_rows_);
- CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_);
- UpdateCompressedRowSparseMatrixImpl<true>(
- crs_matrix, values(), transpose_block_structure_.get());
- }
- void BlockSparseMatrix::UpdateCompressedRowSparseMatrix(
- CompressedRowSparseMatrix* crs_matrix) const {
- CHECK(crs_matrix != nullptr);
- CHECK_EQ(crs_matrix->num_rows(), num_rows_);
- CHECK_EQ(crs_matrix->num_cols(), num_cols_);
- CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_);
- UpdateCompressedRowSparseMatrixImpl<false>(
- crs_matrix, values(), block_structure_.get());
- }
- void BlockSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const {
- CHECK(dense_matrix != nullptr);
- dense_matrix->resize(num_rows_, num_cols_);
- dense_matrix->setZero();
- Matrix& m = *dense_matrix;
- for (int i = 0; i < block_structure_->rows.size(); ++i) {
- int row_block_pos = block_structure_->rows[i].block.position;
- int row_block_size = block_structure_->rows[i].block.size;
- auto& cells = block_structure_->rows[i].cells;
- for (const auto& cell : cells) {
- int col_block_id = cell.block_id;
- int col_block_size = block_structure_->cols[col_block_id].size;
- int col_block_pos = block_structure_->cols[col_block_id].position;
- int jac_pos = cell.position;
- m.block(row_block_pos, col_block_pos, row_block_size, col_block_size) +=
- MatrixRef(values_ + jac_pos, row_block_size, col_block_size);
- }
- }
- }
- void BlockSparseMatrix::ToTripletSparseMatrix(
- TripletSparseMatrix* matrix) const {
- CHECK(matrix != nullptr);
- matrix->Reserve(num_nonzeros_);
- matrix->Resize(num_rows_, num_cols_);
- matrix->SetZero();
- for (int i = 0; i < block_structure_->rows.size(); ++i) {
- int row_block_pos = block_structure_->rows[i].block.position;
- int row_block_size = block_structure_->rows[i].block.size;
- const auto& cells = block_structure_->rows[i].cells;
- for (const auto& cell : cells) {
- int col_block_id = cell.block_id;
- int col_block_size = block_structure_->cols[col_block_id].size;
- int col_block_pos = block_structure_->cols[col_block_id].position;
- int jac_pos = cell.position;
- for (int r = 0; r < row_block_size; ++r) {
- for (int c = 0; c < col_block_size; ++c, ++jac_pos) {
- matrix->mutable_rows()[jac_pos] = row_block_pos + r;
- matrix->mutable_cols()[jac_pos] = col_block_pos + c;
- matrix->mutable_values()[jac_pos] = values_[jac_pos];
- }
- }
- }
- }
- matrix->set_num_nonzeros(num_nonzeros_);
- }
- // Return a pointer to the block structure. We continue to hold
- // ownership of the object though.
- const CompressedRowBlockStructure* BlockSparseMatrix::block_structure() const {
- return block_structure_.get();
- }
- // Return a pointer to the block structure of matrix transpose. We continue to
- // hold ownership of the object though.
- const CompressedRowBlockStructure*
- BlockSparseMatrix::transpose_block_structure() const {
- return transpose_block_structure_.get();
- }
- void BlockSparseMatrix::ToTextFile(FILE* file) const {
- CHECK(file != nullptr);
- for (int i = 0; i < block_structure_->rows.size(); ++i) {
- const int row_block_pos = block_structure_->rows[i].block.position;
- const int row_block_size = block_structure_->rows[i].block.size;
- const auto& cells = block_structure_->rows[i].cells;
- for (const auto& cell : cells) {
- const int col_block_id = cell.block_id;
- const int col_block_size = block_structure_->cols[col_block_id].size;
- const int col_block_pos = block_structure_->cols[col_block_id].position;
- int jac_pos = cell.position;
- for (int r = 0; r < row_block_size; ++r) {
- for (int c = 0; c < col_block_size; ++c) {
- fprintf(file,
- "% 10d % 10d %17f\n",
- row_block_pos + r,
- col_block_pos + c,
- values_[jac_pos++]);
- }
- }
- }
- }
- }
- std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateDiagonalMatrix(
- const double* diagonal, const std::vector<Block>& column_blocks) {
- // Create the block structure for the diagonal matrix.
- auto* bs = new CompressedRowBlockStructure();
- bs->cols = column_blocks;
- int position = 0;
- bs->rows.resize(column_blocks.size(), CompressedRow(1));
- for (int i = 0; i < column_blocks.size(); ++i) {
- CompressedRow& row = bs->rows[i];
- row.block = column_blocks[i];
- Cell& cell = row.cells[0];
- cell.block_id = i;
- cell.position = position;
- position += row.block.size * row.block.size;
- }
- // Create the BlockSparseMatrix with the given block structure.
- auto matrix = std::make_unique<BlockSparseMatrix>(bs);
- matrix->SetZero();
- // Fill the values array of the block sparse matrix.
- double* values = matrix->mutable_values();
- for (const auto& column_block : column_blocks) {
- const int size = column_block.size;
- for (int j = 0; j < size; ++j) {
- // (j + 1) * size is compact way of accessing the (j,j) entry.
- values[j * (size + 1)] = diagonal[j];
- }
- diagonal += size;
- values += size * size;
- }
- return matrix;
- }
- void BlockSparseMatrix::AppendRows(const BlockSparseMatrix& m) {
- CHECK_EQ(m.num_cols(), num_cols());
- const CompressedRowBlockStructure* m_bs = m.block_structure();
- CHECK_EQ(m_bs->cols.size(), block_structure_->cols.size());
- const int old_num_nonzeros = num_nonzeros_;
- const int old_num_row_blocks = block_structure_->rows.size();
- block_structure_->rows.resize(old_num_row_blocks + m_bs->rows.size());
- for (int i = 0; i < m_bs->rows.size(); ++i) {
- const CompressedRow& m_row = m_bs->rows[i];
- const int row_block_id = old_num_row_blocks + i;
- CompressedRow& row = block_structure_->rows[row_block_id];
- row.block.size = m_row.block.size;
- row.block.position = num_rows_;
- num_rows_ += m_row.block.size;
- row.cells.resize(m_row.cells.size());
- if (transpose_block_structure_) {
- transpose_block_structure_->cols.emplace_back(row.block);
- }
- for (int c = 0; c < m_row.cells.size(); ++c) {
- const int block_id = m_row.cells[c].block_id;
- row.cells[c].block_id = block_id;
- row.cells[c].position = num_nonzeros_;
- const int cell_nnz = m_row.block.size * m_bs->cols[block_id].size;
- if (transpose_block_structure_) {
- transpose_block_structure_->rows[block_id].cells.emplace_back(
- row_block_id, num_nonzeros_);
- transpose_block_structure_->rows[block_id].nnz += cell_nnz;
- }
- num_nonzeros_ += cell_nnz;
- }
- }
- if (num_nonzeros_ > max_num_nonzeros_) {
- double* old_values = values_;
- values_ = AllocateValues(num_nonzeros_);
- std::copy_n(old_values, old_num_nonzeros, values_);
- max_num_nonzeros_ = num_nonzeros_;
- FreeValues(old_values);
- }
- std::copy(
- m.values(), m.values() + m.num_nonzeros(), values_ + old_num_nonzeros);
- if (transpose_block_structure_ == nullptr) {
- return;
- }
- ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows);
- }
- void BlockSparseMatrix::DeleteRowBlocks(const int delta_row_blocks) {
- const int num_row_blocks = block_structure_->rows.size();
- const int new_num_row_blocks = num_row_blocks - delta_row_blocks;
- int delta_num_nonzeros = 0;
- int delta_num_rows = 0;
- const std::vector<Block>& column_blocks = block_structure_->cols;
- for (int i = 0; i < delta_row_blocks; ++i) {
- const CompressedRow& row = block_structure_->rows[num_row_blocks - i - 1];
- delta_num_rows += row.block.size;
- for (int c = 0; c < row.cells.size(); ++c) {
- const Cell& cell = row.cells[c];
- delta_num_nonzeros += row.block.size * column_blocks[cell.block_id].size;
- if (transpose_block_structure_) {
- auto& col_cells = transpose_block_structure_->rows[cell.block_id].cells;
- while (!col_cells.empty() &&
- col_cells.back().block_id >= new_num_row_blocks) {
- const int del_block_id = col_cells.back().block_id;
- const int del_block_rows =
- block_structure_->rows[del_block_id].block.size;
- const int del_block_cols = column_blocks[cell.block_id].size;
- const int del_cell_nnz = del_block_rows * del_block_cols;
- transpose_block_structure_->rows[cell.block_id].nnz -= del_cell_nnz;
- col_cells.pop_back();
- }
- }
- }
- }
- num_nonzeros_ -= delta_num_nonzeros;
- num_rows_ -= delta_num_rows;
- block_structure_->rows.resize(new_num_row_blocks);
- if (transpose_block_structure_ == nullptr) {
- return;
- }
- for (int i = 0; i < delta_row_blocks; ++i) {
- transpose_block_structure_->cols.pop_back();
- }
- ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows);
- }
- std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateRandomMatrix(
- const BlockSparseMatrix::RandomMatrixOptions& options,
- std::mt19937& prng,
- bool use_page_locked_memory) {
- CHECK_GT(options.num_row_blocks, 0);
- CHECK_GT(options.min_row_block_size, 0);
- CHECK_GT(options.max_row_block_size, 0);
- CHECK_LE(options.min_row_block_size, options.max_row_block_size);
- CHECK_GT(options.block_density, 0.0);
- CHECK_LE(options.block_density, 1.0);
- std::uniform_int_distribution<int> col_distribution(
- options.min_col_block_size, options.max_col_block_size);
- std::uniform_int_distribution<int> row_distribution(
- options.min_row_block_size, options.max_row_block_size);
- auto bs = std::make_unique<CompressedRowBlockStructure>();
- if (options.col_blocks.empty()) {
- CHECK_GT(options.num_col_blocks, 0);
- CHECK_GT(options.min_col_block_size, 0);
- CHECK_GT(options.max_col_block_size, 0);
- CHECK_LE(options.min_col_block_size, options.max_col_block_size);
- // Generate the col block structure.
- int col_block_position = 0;
- for (int i = 0; i < options.num_col_blocks; ++i) {
- const int col_block_size = col_distribution(prng);
- bs->cols.emplace_back(col_block_size, col_block_position);
- col_block_position += col_block_size;
- }
- } else {
- bs->cols = options.col_blocks;
- }
- bool matrix_has_blocks = false;
- std::uniform_real_distribution<double> uniform01(0.0, 1.0);
- while (!matrix_has_blocks) {
- VLOG(1) << "Clearing";
- bs->rows.clear();
- int row_block_position = 0;
- int value_position = 0;
- for (int r = 0; r < options.num_row_blocks; ++r) {
- const int row_block_size = row_distribution(prng);
- bs->rows.emplace_back();
- CompressedRow& row = bs->rows.back();
- row.block.size = row_block_size;
- row.block.position = row_block_position;
- row_block_position += row_block_size;
- for (int c = 0; c < bs->cols.size(); ++c) {
- if (uniform01(prng) > options.block_density) continue;
- row.cells.emplace_back();
- Cell& cell = row.cells.back();
- cell.block_id = c;
- cell.position = value_position;
- value_position += row_block_size * bs->cols[c].size;
- matrix_has_blocks = true;
- }
- }
- }
- auto matrix =
- std::make_unique<BlockSparseMatrix>(bs.release(), use_page_locked_memory);
- double* values = matrix->mutable_values();
- std::normal_distribution<double> standard_normal_distribution;
- std::generate_n(
- values, matrix->num_nonzeros(), [&standard_normal_distribution, &prng] {
- return standard_normal_distribution(prng);
- });
- return matrix;
- }
- std::unique_ptr<CompressedRowBlockStructure> CreateTranspose(
- const CompressedRowBlockStructure& bs) {
- auto transpose = std::make_unique<CompressedRowBlockStructure>();
- transpose->rows.resize(bs.cols.size());
- for (int i = 0; i < bs.cols.size(); ++i) {
- transpose->rows[i].block = bs.cols[i];
- transpose->rows[i].nnz = 0;
- }
- transpose->cols.resize(bs.rows.size());
- for (int i = 0; i < bs.rows.size(); ++i) {
- auto& row = bs.rows[i];
- transpose->cols[i] = row.block;
- const int nrows = row.block.size;
- for (auto& cell : row.cells) {
- transpose->rows[cell.block_id].cells.emplace_back(i, cell.position);
- const int ncols = transpose->rows[cell.block_id].block.size;
- transpose->rows[cell.block_id].nnz += nrows * ncols;
- }
- }
- ComputeCumulativeNumberOfNonZeros(transpose->rows);
- return transpose;
- }
- double* BlockSparseMatrix::AllocateValues(int size) {
- if (!use_page_locked_memory_) {
- return new double[size];
- }
- #ifndef CERES_NO_CUDA
- double* values = nullptr;
- CHECK_EQ(cudaSuccess,
- cudaHostAlloc(&values, sizeof(double) * size, cudaHostAllocDefault));
- return values;
- #else
- LOG(FATAL) << "Page locked memory requested when CUDA is not available. "
- << "This is a Ceres bug; please contact the developers!";
- return nullptr;
- #endif
- };
- void BlockSparseMatrix::FreeValues(double*& values) {
- if (!use_page_locked_memory_) {
- delete[] values;
- values = nullptr;
- return;
- }
- #ifndef CERES_NO_CUDA
- CHECK_EQ(cudaSuccess, cudaFreeHost(values));
- values = nullptr;
- #else
- LOG(FATAL) << "Page locked memory requested when CUDA is not available. "
- << "This is a Ceres bug; please contact the developers!";
- #endif
- };
- } // namespace ceres::internal
|