block_sparse_matrix.cc 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843
  1. // Ceres Solver - A fast non-linear least squares minimizer
  2. // Copyright 2023 Google Inc. All rights reserved.
  3. // http://ceres-solver.org/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are met:
  7. //
  8. // * Redistributions of source code must retain the above copyright notice,
  9. // this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above copyright notice,
  11. // this list of conditions and the following disclaimer in the documentation
  12. // and/or other materials provided with the distribution.
  13. // * Neither the name of Google Inc. nor the names of its contributors may be
  14. // used to endorse or promote products derived from this software without
  15. // specific prior written permission.
  16. //
  17. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18. // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21. // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22. // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23. // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24. // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25. // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26. // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27. // POSSIBILITY OF SUCH DAMAGE.
  28. //
  29. // Author: sameeragarwal@google.com (Sameer Agarwal)
  30. #include "ceres/block_sparse_matrix.h"
  31. #include <algorithm>
  32. #include <cstddef>
  33. #include <memory>
  34. #include <numeric>
  35. #include <random>
  36. #include <vector>
  37. #include "ceres/block_structure.h"
  38. #include "ceres/crs_matrix.h"
  39. #include "ceres/internal/eigen.h"
  40. #include "ceres/parallel_for.h"
  41. #include "ceres/parallel_vector_ops.h"
  42. #include "ceres/small_blas.h"
  43. #include "ceres/triplet_sparse_matrix.h"
  44. #include "glog/logging.h"
  45. #ifndef CERES_NO_CUDA
  46. #include "cuda_runtime.h"
  47. #endif
  48. namespace ceres::internal {
  49. namespace {
  50. void ComputeCumulativeNumberOfNonZeros(std::vector<CompressedList>& rows) {
  51. if (rows.empty()) {
  52. return;
  53. }
  54. rows[0].cumulative_nnz = rows[0].nnz;
  55. for (int c = 1; c < rows.size(); ++c) {
  56. const int curr_nnz = rows[c].nnz;
  57. rows[c].cumulative_nnz = curr_nnz + rows[c - 1].cumulative_nnz;
  58. }
  59. }
  60. template <bool transpose>
  61. std::unique_ptr<CompressedRowSparseMatrix>
  62. CreateStructureOfCompressedRowSparseMatrix(
  63. const double* values,
  64. int num_rows,
  65. int num_cols,
  66. int num_nonzeros,
  67. const CompressedRowBlockStructure* block_structure) {
  68. auto crs_matrix = std::make_unique<CompressedRowSparseMatrix>(
  69. num_rows, num_cols, num_nonzeros);
  70. auto crs_cols = crs_matrix->mutable_cols();
  71. auto crs_rows = crs_matrix->mutable_rows();
  72. int value_offset = 0;
  73. const int num_row_blocks = block_structure->rows.size();
  74. const auto& cols = block_structure->cols;
  75. *crs_rows++ = 0;
  76. for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) {
  77. const auto& row_block = block_structure->rows[row_block_id];
  78. // Empty row block: only requires setting row offsets
  79. if (row_block.cells.empty()) {
  80. std::fill(crs_rows, crs_rows + row_block.block.size, value_offset);
  81. crs_rows += row_block.block.size;
  82. continue;
  83. }
  84. int row_nnz = 0;
  85. if constexpr (transpose) {
  86. // Transposed block structure comes with nnz in row-block filled-in
  87. row_nnz = row_block.nnz / row_block.block.size;
  88. } else {
  89. // Nnz field of non-transposed block structure is not filled and it can
  90. // have non-sequential structure (consider the case of jacobian for
  91. // Schur-complement solver: E and F blocks are stored separately).
  92. for (auto& c : row_block.cells) {
  93. row_nnz += cols[c.block_id].size;
  94. }
  95. }
  96. // Row-wise setup of matrix structure
  97. for (int row = 0; row < row_block.block.size; ++row) {
  98. value_offset += row_nnz;
  99. *crs_rows++ = value_offset;
  100. for (auto& c : row_block.cells) {
  101. const int col_block_size = cols[c.block_id].size;
  102. const int col_position = cols[c.block_id].position;
  103. std::iota(crs_cols, crs_cols + col_block_size, col_position);
  104. crs_cols += col_block_size;
  105. }
  106. }
  107. }
  108. return crs_matrix;
  109. }
  110. template <bool transpose>
  111. void UpdateCompressedRowSparseMatrixImpl(
  112. CompressedRowSparseMatrix* crs_matrix,
  113. const double* values,
  114. const CompressedRowBlockStructure* block_structure) {
  115. auto crs_values = crs_matrix->mutable_values();
  116. auto crs_rows = crs_matrix->mutable_rows();
  117. const int num_row_blocks = block_structure->rows.size();
  118. const auto& cols = block_structure->cols;
  119. for (int row_block_id = 0; row_block_id < num_row_blocks; ++row_block_id) {
  120. const auto& row_block = block_structure->rows[row_block_id];
  121. const int row_block_size = row_block.block.size;
  122. const int row_nnz = crs_rows[1] - crs_rows[0];
  123. crs_rows += row_block_size;
  124. if (row_nnz == 0) {
  125. continue;
  126. }
  127. MatrixRef crs_row_block(crs_values, row_block_size, row_nnz);
  128. int col_offset = 0;
  129. for (auto& c : row_block.cells) {
  130. const int col_block_size = cols[c.block_id].size;
  131. auto crs_cell =
  132. crs_row_block.block(0, col_offset, row_block_size, col_block_size);
  133. if constexpr (transpose) {
  134. // Transposed matrix is filled using transposed block-strucutre
  135. ConstMatrixRef cell(
  136. values + c.position, col_block_size, row_block_size);
  137. crs_cell = cell.transpose();
  138. } else {
  139. ConstMatrixRef cell(
  140. values + c.position, row_block_size, col_block_size);
  141. crs_cell = cell;
  142. }
  143. col_offset += col_block_size;
  144. }
  145. crs_values += row_nnz * row_block_size;
  146. }
  147. }
  148. void SetBlockStructureOfCompressedRowSparseMatrix(
  149. CompressedRowSparseMatrix* crs_matrix,
  150. CompressedRowBlockStructure* block_structure) {
  151. const int num_row_blocks = block_structure->rows.size();
  152. auto& row_blocks = *crs_matrix->mutable_row_blocks();
  153. row_blocks.resize(num_row_blocks);
  154. for (int i = 0; i < num_row_blocks; ++i) {
  155. row_blocks[i] = block_structure->rows[i].block;
  156. }
  157. auto& col_blocks = *crs_matrix->mutable_col_blocks();
  158. col_blocks = block_structure->cols;
  159. }
  160. } // namespace
  161. BlockSparseMatrix::BlockSparseMatrix(
  162. CompressedRowBlockStructure* block_structure, bool use_page_locked_memory)
  163. : use_page_locked_memory_(use_page_locked_memory),
  164. num_rows_(0),
  165. num_cols_(0),
  166. num_nonzeros_(0),
  167. block_structure_(block_structure) {
  168. CHECK(block_structure_ != nullptr);
  169. // Count the number of columns in the matrix.
  170. for (auto& col : block_structure_->cols) {
  171. num_cols_ += col.size;
  172. }
  173. // Count the number of non-zero entries and the number of rows in
  174. // the matrix.
  175. for (int i = 0; i < block_structure_->rows.size(); ++i) {
  176. int row_block_size = block_structure_->rows[i].block.size;
  177. num_rows_ += row_block_size;
  178. const std::vector<Cell>& cells = block_structure_->rows[i].cells;
  179. for (const auto& cell : cells) {
  180. int col_block_id = cell.block_id;
  181. int col_block_size = block_structure_->cols[col_block_id].size;
  182. num_nonzeros_ += col_block_size * row_block_size;
  183. }
  184. }
  185. CHECK_GE(num_rows_, 0);
  186. CHECK_GE(num_cols_, 0);
  187. CHECK_GE(num_nonzeros_, 0);
  188. VLOG(2) << "Allocating values array with " << num_nonzeros_ * sizeof(double)
  189. << " bytes."; // NOLINT
  190. values_ = AllocateValues(num_nonzeros_);
  191. max_num_nonzeros_ = num_nonzeros_;
  192. CHECK(values_ != nullptr);
  193. AddTransposeBlockStructure();
  194. }
  195. BlockSparseMatrix::~BlockSparseMatrix() { FreeValues(values_); }
  196. void BlockSparseMatrix::AddTransposeBlockStructure() {
  197. if (transpose_block_structure_ == nullptr) {
  198. transpose_block_structure_ = CreateTranspose(*block_structure_);
  199. }
  200. }
  201. void BlockSparseMatrix::SetZero() {
  202. std::fill(values_, values_ + num_nonzeros_, 0.0);
  203. }
  204. void BlockSparseMatrix::SetZero(ContextImpl* context, int num_threads) {
  205. ParallelSetZero(context, num_threads, values_, num_nonzeros_);
  206. }
  207. void BlockSparseMatrix::RightMultiplyAndAccumulate(const double* x,
  208. double* y) const {
  209. RightMultiplyAndAccumulate(x, y, nullptr, 1);
  210. }
  211. void BlockSparseMatrix::RightMultiplyAndAccumulate(const double* x,
  212. double* y,
  213. ContextImpl* context,
  214. int num_threads) const {
  215. CHECK(x != nullptr);
  216. CHECK(y != nullptr);
  217. const auto values = values_;
  218. const auto block_structure = block_structure_.get();
  219. const auto num_row_blocks = block_structure->rows.size();
  220. ParallelFor(context,
  221. 0,
  222. num_row_blocks,
  223. num_threads,
  224. [values, block_structure, x, y](int row_block_id) {
  225. const int row_block_pos =
  226. block_structure->rows[row_block_id].block.position;
  227. const int row_block_size =
  228. block_structure->rows[row_block_id].block.size;
  229. const auto& cells = block_structure->rows[row_block_id].cells;
  230. for (const auto& cell : cells) {
  231. const int col_block_id = cell.block_id;
  232. const int col_block_size =
  233. block_structure->cols[col_block_id].size;
  234. const int col_block_pos =
  235. block_structure->cols[col_block_id].position;
  236. MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
  237. values + cell.position,
  238. row_block_size,
  239. col_block_size,
  240. x + col_block_pos,
  241. y + row_block_pos);
  242. }
  243. });
  244. }
  245. // TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method
  246. // might benefit from caching column-block partition
  247. void BlockSparseMatrix::LeftMultiplyAndAccumulate(const double* x,
  248. double* y,
  249. ContextImpl* context,
  250. int num_threads) const {
  251. // While utilizing transposed structure allows to perform parallel
  252. // left-multiplication by dense vector, it makes access patterns to matrix
  253. // elements scattered. Thus, multiplication using transposed structure
  254. // is only useful for parallel execution
  255. CHECK(x != nullptr);
  256. CHECK(y != nullptr);
  257. if (transpose_block_structure_ == nullptr || num_threads == 1) {
  258. LeftMultiplyAndAccumulate(x, y);
  259. return;
  260. }
  261. auto transpose_bs = transpose_block_structure_.get();
  262. const auto values = values_;
  263. const int num_col_blocks = transpose_bs->rows.size();
  264. if (!num_col_blocks) {
  265. return;
  266. }
  267. // Use non-zero count as iteration cost for guided parallel-for loop
  268. ParallelFor(
  269. context,
  270. 0,
  271. num_col_blocks,
  272. num_threads,
  273. [values, transpose_bs, x, y](int row_block_id) {
  274. int row_block_pos = transpose_bs->rows[row_block_id].block.position;
  275. int row_block_size = transpose_bs->rows[row_block_id].block.size;
  276. auto& cells = transpose_bs->rows[row_block_id].cells;
  277. for (auto& cell : cells) {
  278. const int col_block_id = cell.block_id;
  279. const int col_block_size = transpose_bs->cols[col_block_id].size;
  280. const int col_block_pos = transpose_bs->cols[col_block_id].position;
  281. MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
  282. values + cell.position,
  283. col_block_size,
  284. row_block_size,
  285. x + col_block_pos,
  286. y + row_block_pos);
  287. }
  288. },
  289. transpose_bs->rows.data(),
  290. [](const CompressedRow& row) { return row.cumulative_nnz; });
  291. }
  292. void BlockSparseMatrix::LeftMultiplyAndAccumulate(const double* x,
  293. double* y) const {
  294. CHECK(x != nullptr);
  295. CHECK(y != nullptr);
  296. // Single-threaded left products are always computed using a non-transpose
  297. // block structure, because it has linear acess pattern to matrix elements
  298. for (int i = 0; i < block_structure_->rows.size(); ++i) {
  299. int row_block_pos = block_structure_->rows[i].block.position;
  300. int row_block_size = block_structure_->rows[i].block.size;
  301. const auto& cells = block_structure_->rows[i].cells;
  302. for (const auto& cell : cells) {
  303. int col_block_id = cell.block_id;
  304. int col_block_size = block_structure_->cols[col_block_id].size;
  305. int col_block_pos = block_structure_->cols[col_block_id].position;
  306. MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
  307. values_ + cell.position,
  308. row_block_size,
  309. col_block_size,
  310. x + row_block_pos,
  311. y + col_block_pos);
  312. }
  313. }
  314. }
  315. void BlockSparseMatrix::SquaredColumnNorm(double* x) const {
  316. CHECK(x != nullptr);
  317. VectorRef(x, num_cols_).setZero();
  318. for (int i = 0; i < block_structure_->rows.size(); ++i) {
  319. int row_block_size = block_structure_->rows[i].block.size;
  320. auto& cells = block_structure_->rows[i].cells;
  321. for (const auto& cell : cells) {
  322. int col_block_id = cell.block_id;
  323. int col_block_size = block_structure_->cols[col_block_id].size;
  324. int col_block_pos = block_structure_->cols[col_block_id].position;
  325. const MatrixRef m(
  326. values_ + cell.position, row_block_size, col_block_size);
  327. VectorRef(x + col_block_pos, col_block_size) += m.colwise().squaredNorm();
  328. }
  329. }
  330. }
  331. // TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method
  332. // might benefit from caching column-block partition
  333. void BlockSparseMatrix::SquaredColumnNorm(double* x,
  334. ContextImpl* context,
  335. int num_threads) const {
  336. if (transpose_block_structure_ == nullptr || num_threads == 1) {
  337. SquaredColumnNorm(x);
  338. return;
  339. }
  340. CHECK(x != nullptr);
  341. ParallelSetZero(context, num_threads, x, num_cols_);
  342. auto transpose_bs = transpose_block_structure_.get();
  343. const auto values = values_;
  344. const int num_col_blocks = transpose_bs->rows.size();
  345. ParallelFor(
  346. context,
  347. 0,
  348. num_col_blocks,
  349. num_threads,
  350. [values, transpose_bs, x](int row_block_id) {
  351. const auto& row = transpose_bs->rows[row_block_id];
  352. for (auto& cell : row.cells) {
  353. const auto& col = transpose_bs->cols[cell.block_id];
  354. const MatrixRef m(values + cell.position, col.size, row.block.size);
  355. VectorRef(x + row.block.position, row.block.size) +=
  356. m.colwise().squaredNorm();
  357. }
  358. },
  359. transpose_bs->rows.data(),
  360. [](const CompressedRow& row) { return row.cumulative_nnz; });
  361. }
  362. void BlockSparseMatrix::ScaleColumns(const double* scale) {
  363. CHECK(scale != nullptr);
  364. for (int i = 0; i < block_structure_->rows.size(); ++i) {
  365. int row_block_size = block_structure_->rows[i].block.size;
  366. auto& cells = block_structure_->rows[i].cells;
  367. for (const auto& cell : cells) {
  368. int col_block_id = cell.block_id;
  369. int col_block_size = block_structure_->cols[col_block_id].size;
  370. int col_block_pos = block_structure_->cols[col_block_id].position;
  371. MatrixRef m(values_ + cell.position, row_block_size, col_block_size);
  372. m *= ConstVectorRef(scale + col_block_pos, col_block_size).asDiagonal();
  373. }
  374. }
  375. }
  376. // TODO(https://github.com/ceres-solver/ceres-solver/issues/933): This method
  377. // might benefit from caching column-block partition
  378. void BlockSparseMatrix::ScaleColumns(const double* scale,
  379. ContextImpl* context,
  380. int num_threads) {
  381. if (transpose_block_structure_ == nullptr || num_threads == 1) {
  382. ScaleColumns(scale);
  383. return;
  384. }
  385. CHECK(scale != nullptr);
  386. auto transpose_bs = transpose_block_structure_.get();
  387. auto values = values_;
  388. const int num_col_blocks = transpose_bs->rows.size();
  389. ParallelFor(
  390. context,
  391. 0,
  392. num_col_blocks,
  393. num_threads,
  394. [values, transpose_bs, scale](int row_block_id) {
  395. const auto& row = transpose_bs->rows[row_block_id];
  396. for (auto& cell : row.cells) {
  397. const auto& col = transpose_bs->cols[cell.block_id];
  398. MatrixRef m(values + cell.position, col.size, row.block.size);
  399. m *= ConstVectorRef(scale + row.block.position, row.block.size)
  400. .asDiagonal();
  401. }
  402. },
  403. transpose_bs->rows.data(),
  404. [](const CompressedRow& row) { return row.cumulative_nnz; });
  405. }
  406. std::unique_ptr<CompressedRowSparseMatrix>
  407. BlockSparseMatrix::ToCompressedRowSparseMatrixTranspose() const {
  408. auto bs = transpose_block_structure_.get();
  409. auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<true>(
  410. values(), num_cols_, num_rows_, num_nonzeros_, bs);
  411. SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(), bs);
  412. UpdateCompressedRowSparseMatrixTranspose(crs_matrix.get());
  413. return crs_matrix;
  414. }
  415. std::unique_ptr<CompressedRowSparseMatrix>
  416. BlockSparseMatrix::ToCompressedRowSparseMatrix() const {
  417. auto crs_matrix = CreateStructureOfCompressedRowSparseMatrix<false>(
  418. values(), num_rows_, num_cols_, num_nonzeros_, block_structure_.get());
  419. SetBlockStructureOfCompressedRowSparseMatrix(crs_matrix.get(),
  420. block_structure_.get());
  421. UpdateCompressedRowSparseMatrix(crs_matrix.get());
  422. return crs_matrix;
  423. }
  424. void BlockSparseMatrix::UpdateCompressedRowSparseMatrixTranspose(
  425. CompressedRowSparseMatrix* crs_matrix) const {
  426. CHECK(crs_matrix != nullptr);
  427. CHECK_EQ(crs_matrix->num_rows(), num_cols_);
  428. CHECK_EQ(crs_matrix->num_cols(), num_rows_);
  429. CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_);
  430. UpdateCompressedRowSparseMatrixImpl<true>(
  431. crs_matrix, values(), transpose_block_structure_.get());
  432. }
  433. void BlockSparseMatrix::UpdateCompressedRowSparseMatrix(
  434. CompressedRowSparseMatrix* crs_matrix) const {
  435. CHECK(crs_matrix != nullptr);
  436. CHECK_EQ(crs_matrix->num_rows(), num_rows_);
  437. CHECK_EQ(crs_matrix->num_cols(), num_cols_);
  438. CHECK_EQ(crs_matrix->num_nonzeros(), num_nonzeros_);
  439. UpdateCompressedRowSparseMatrixImpl<false>(
  440. crs_matrix, values(), block_structure_.get());
  441. }
  442. void BlockSparseMatrix::ToDenseMatrix(Matrix* dense_matrix) const {
  443. CHECK(dense_matrix != nullptr);
  444. dense_matrix->resize(num_rows_, num_cols_);
  445. dense_matrix->setZero();
  446. Matrix& m = *dense_matrix;
  447. for (int i = 0; i < block_structure_->rows.size(); ++i) {
  448. int row_block_pos = block_structure_->rows[i].block.position;
  449. int row_block_size = block_structure_->rows[i].block.size;
  450. auto& cells = block_structure_->rows[i].cells;
  451. for (const auto& cell : cells) {
  452. int col_block_id = cell.block_id;
  453. int col_block_size = block_structure_->cols[col_block_id].size;
  454. int col_block_pos = block_structure_->cols[col_block_id].position;
  455. int jac_pos = cell.position;
  456. m.block(row_block_pos, col_block_pos, row_block_size, col_block_size) +=
  457. MatrixRef(values_ + jac_pos, row_block_size, col_block_size);
  458. }
  459. }
  460. }
  461. void BlockSparseMatrix::ToTripletSparseMatrix(
  462. TripletSparseMatrix* matrix) const {
  463. CHECK(matrix != nullptr);
  464. matrix->Reserve(num_nonzeros_);
  465. matrix->Resize(num_rows_, num_cols_);
  466. matrix->SetZero();
  467. for (int i = 0; i < block_structure_->rows.size(); ++i) {
  468. int row_block_pos = block_structure_->rows[i].block.position;
  469. int row_block_size = block_structure_->rows[i].block.size;
  470. const auto& cells = block_structure_->rows[i].cells;
  471. for (const auto& cell : cells) {
  472. int col_block_id = cell.block_id;
  473. int col_block_size = block_structure_->cols[col_block_id].size;
  474. int col_block_pos = block_structure_->cols[col_block_id].position;
  475. int jac_pos = cell.position;
  476. for (int r = 0; r < row_block_size; ++r) {
  477. for (int c = 0; c < col_block_size; ++c, ++jac_pos) {
  478. matrix->mutable_rows()[jac_pos] = row_block_pos + r;
  479. matrix->mutable_cols()[jac_pos] = col_block_pos + c;
  480. matrix->mutable_values()[jac_pos] = values_[jac_pos];
  481. }
  482. }
  483. }
  484. }
  485. matrix->set_num_nonzeros(num_nonzeros_);
  486. }
  487. // Return a pointer to the block structure. We continue to hold
  488. // ownership of the object though.
  489. const CompressedRowBlockStructure* BlockSparseMatrix::block_structure() const {
  490. return block_structure_.get();
  491. }
  492. // Return a pointer to the block structure of matrix transpose. We continue to
  493. // hold ownership of the object though.
  494. const CompressedRowBlockStructure*
  495. BlockSparseMatrix::transpose_block_structure() const {
  496. return transpose_block_structure_.get();
  497. }
  498. void BlockSparseMatrix::ToTextFile(FILE* file) const {
  499. CHECK(file != nullptr);
  500. for (int i = 0; i < block_structure_->rows.size(); ++i) {
  501. const int row_block_pos = block_structure_->rows[i].block.position;
  502. const int row_block_size = block_structure_->rows[i].block.size;
  503. const auto& cells = block_structure_->rows[i].cells;
  504. for (const auto& cell : cells) {
  505. const int col_block_id = cell.block_id;
  506. const int col_block_size = block_structure_->cols[col_block_id].size;
  507. const int col_block_pos = block_structure_->cols[col_block_id].position;
  508. int jac_pos = cell.position;
  509. for (int r = 0; r < row_block_size; ++r) {
  510. for (int c = 0; c < col_block_size; ++c) {
  511. fprintf(file,
  512. "% 10d % 10d %17f\n",
  513. row_block_pos + r,
  514. col_block_pos + c,
  515. values_[jac_pos++]);
  516. }
  517. }
  518. }
  519. }
  520. }
  521. std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateDiagonalMatrix(
  522. const double* diagonal, const std::vector<Block>& column_blocks) {
  523. // Create the block structure for the diagonal matrix.
  524. auto* bs = new CompressedRowBlockStructure();
  525. bs->cols = column_blocks;
  526. int position = 0;
  527. bs->rows.resize(column_blocks.size(), CompressedRow(1));
  528. for (int i = 0; i < column_blocks.size(); ++i) {
  529. CompressedRow& row = bs->rows[i];
  530. row.block = column_blocks[i];
  531. Cell& cell = row.cells[0];
  532. cell.block_id = i;
  533. cell.position = position;
  534. position += row.block.size * row.block.size;
  535. }
  536. // Create the BlockSparseMatrix with the given block structure.
  537. auto matrix = std::make_unique<BlockSparseMatrix>(bs);
  538. matrix->SetZero();
  539. // Fill the values array of the block sparse matrix.
  540. double* values = matrix->mutable_values();
  541. for (const auto& column_block : column_blocks) {
  542. const int size = column_block.size;
  543. for (int j = 0; j < size; ++j) {
  544. // (j + 1) * size is compact way of accessing the (j,j) entry.
  545. values[j * (size + 1)] = diagonal[j];
  546. }
  547. diagonal += size;
  548. values += size * size;
  549. }
  550. return matrix;
  551. }
  552. void BlockSparseMatrix::AppendRows(const BlockSparseMatrix& m) {
  553. CHECK_EQ(m.num_cols(), num_cols());
  554. const CompressedRowBlockStructure* m_bs = m.block_structure();
  555. CHECK_EQ(m_bs->cols.size(), block_structure_->cols.size());
  556. const int old_num_nonzeros = num_nonzeros_;
  557. const int old_num_row_blocks = block_structure_->rows.size();
  558. block_structure_->rows.resize(old_num_row_blocks + m_bs->rows.size());
  559. for (int i = 0; i < m_bs->rows.size(); ++i) {
  560. const CompressedRow& m_row = m_bs->rows[i];
  561. const int row_block_id = old_num_row_blocks + i;
  562. CompressedRow& row = block_structure_->rows[row_block_id];
  563. row.block.size = m_row.block.size;
  564. row.block.position = num_rows_;
  565. num_rows_ += m_row.block.size;
  566. row.cells.resize(m_row.cells.size());
  567. if (transpose_block_structure_) {
  568. transpose_block_structure_->cols.emplace_back(row.block);
  569. }
  570. for (int c = 0; c < m_row.cells.size(); ++c) {
  571. const int block_id = m_row.cells[c].block_id;
  572. row.cells[c].block_id = block_id;
  573. row.cells[c].position = num_nonzeros_;
  574. const int cell_nnz = m_row.block.size * m_bs->cols[block_id].size;
  575. if (transpose_block_structure_) {
  576. transpose_block_structure_->rows[block_id].cells.emplace_back(
  577. row_block_id, num_nonzeros_);
  578. transpose_block_structure_->rows[block_id].nnz += cell_nnz;
  579. }
  580. num_nonzeros_ += cell_nnz;
  581. }
  582. }
  583. if (num_nonzeros_ > max_num_nonzeros_) {
  584. double* old_values = values_;
  585. values_ = AllocateValues(num_nonzeros_);
  586. std::copy_n(old_values, old_num_nonzeros, values_);
  587. max_num_nonzeros_ = num_nonzeros_;
  588. FreeValues(old_values);
  589. }
  590. std::copy(
  591. m.values(), m.values() + m.num_nonzeros(), values_ + old_num_nonzeros);
  592. if (transpose_block_structure_ == nullptr) {
  593. return;
  594. }
  595. ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows);
  596. }
  597. void BlockSparseMatrix::DeleteRowBlocks(const int delta_row_blocks) {
  598. const int num_row_blocks = block_structure_->rows.size();
  599. const int new_num_row_blocks = num_row_blocks - delta_row_blocks;
  600. int delta_num_nonzeros = 0;
  601. int delta_num_rows = 0;
  602. const std::vector<Block>& column_blocks = block_structure_->cols;
  603. for (int i = 0; i < delta_row_blocks; ++i) {
  604. const CompressedRow& row = block_structure_->rows[num_row_blocks - i - 1];
  605. delta_num_rows += row.block.size;
  606. for (int c = 0; c < row.cells.size(); ++c) {
  607. const Cell& cell = row.cells[c];
  608. delta_num_nonzeros += row.block.size * column_blocks[cell.block_id].size;
  609. if (transpose_block_structure_) {
  610. auto& col_cells = transpose_block_structure_->rows[cell.block_id].cells;
  611. while (!col_cells.empty() &&
  612. col_cells.back().block_id >= new_num_row_blocks) {
  613. const int del_block_id = col_cells.back().block_id;
  614. const int del_block_rows =
  615. block_structure_->rows[del_block_id].block.size;
  616. const int del_block_cols = column_blocks[cell.block_id].size;
  617. const int del_cell_nnz = del_block_rows * del_block_cols;
  618. transpose_block_structure_->rows[cell.block_id].nnz -= del_cell_nnz;
  619. col_cells.pop_back();
  620. }
  621. }
  622. }
  623. }
  624. num_nonzeros_ -= delta_num_nonzeros;
  625. num_rows_ -= delta_num_rows;
  626. block_structure_->rows.resize(new_num_row_blocks);
  627. if (transpose_block_structure_ == nullptr) {
  628. return;
  629. }
  630. for (int i = 0; i < delta_row_blocks; ++i) {
  631. transpose_block_structure_->cols.pop_back();
  632. }
  633. ComputeCumulativeNumberOfNonZeros(transpose_block_structure_->rows);
  634. }
  635. std::unique_ptr<BlockSparseMatrix> BlockSparseMatrix::CreateRandomMatrix(
  636. const BlockSparseMatrix::RandomMatrixOptions& options,
  637. std::mt19937& prng,
  638. bool use_page_locked_memory) {
  639. CHECK_GT(options.num_row_blocks, 0);
  640. CHECK_GT(options.min_row_block_size, 0);
  641. CHECK_GT(options.max_row_block_size, 0);
  642. CHECK_LE(options.min_row_block_size, options.max_row_block_size);
  643. CHECK_GT(options.block_density, 0.0);
  644. CHECK_LE(options.block_density, 1.0);
  645. std::uniform_int_distribution<int> col_distribution(
  646. options.min_col_block_size, options.max_col_block_size);
  647. std::uniform_int_distribution<int> row_distribution(
  648. options.min_row_block_size, options.max_row_block_size);
  649. auto bs = std::make_unique<CompressedRowBlockStructure>();
  650. if (options.col_blocks.empty()) {
  651. CHECK_GT(options.num_col_blocks, 0);
  652. CHECK_GT(options.min_col_block_size, 0);
  653. CHECK_GT(options.max_col_block_size, 0);
  654. CHECK_LE(options.min_col_block_size, options.max_col_block_size);
  655. // Generate the col block structure.
  656. int col_block_position = 0;
  657. for (int i = 0; i < options.num_col_blocks; ++i) {
  658. const int col_block_size = col_distribution(prng);
  659. bs->cols.emplace_back(col_block_size, col_block_position);
  660. col_block_position += col_block_size;
  661. }
  662. } else {
  663. bs->cols = options.col_blocks;
  664. }
  665. bool matrix_has_blocks = false;
  666. std::uniform_real_distribution<double> uniform01(0.0, 1.0);
  667. while (!matrix_has_blocks) {
  668. VLOG(1) << "Clearing";
  669. bs->rows.clear();
  670. int row_block_position = 0;
  671. int value_position = 0;
  672. for (int r = 0; r < options.num_row_blocks; ++r) {
  673. const int row_block_size = row_distribution(prng);
  674. bs->rows.emplace_back();
  675. CompressedRow& row = bs->rows.back();
  676. row.block.size = row_block_size;
  677. row.block.position = row_block_position;
  678. row_block_position += row_block_size;
  679. for (int c = 0; c < bs->cols.size(); ++c) {
  680. if (uniform01(prng) > options.block_density) continue;
  681. row.cells.emplace_back();
  682. Cell& cell = row.cells.back();
  683. cell.block_id = c;
  684. cell.position = value_position;
  685. value_position += row_block_size * bs->cols[c].size;
  686. matrix_has_blocks = true;
  687. }
  688. }
  689. }
  690. auto matrix =
  691. std::make_unique<BlockSparseMatrix>(bs.release(), use_page_locked_memory);
  692. double* values = matrix->mutable_values();
  693. std::normal_distribution<double> standard_normal_distribution;
  694. std::generate_n(
  695. values, matrix->num_nonzeros(), [&standard_normal_distribution, &prng] {
  696. return standard_normal_distribution(prng);
  697. });
  698. return matrix;
  699. }
  700. std::unique_ptr<CompressedRowBlockStructure> CreateTranspose(
  701. const CompressedRowBlockStructure& bs) {
  702. auto transpose = std::make_unique<CompressedRowBlockStructure>();
  703. transpose->rows.resize(bs.cols.size());
  704. for (int i = 0; i < bs.cols.size(); ++i) {
  705. transpose->rows[i].block = bs.cols[i];
  706. transpose->rows[i].nnz = 0;
  707. }
  708. transpose->cols.resize(bs.rows.size());
  709. for (int i = 0; i < bs.rows.size(); ++i) {
  710. auto& row = bs.rows[i];
  711. transpose->cols[i] = row.block;
  712. const int nrows = row.block.size;
  713. for (auto& cell : row.cells) {
  714. transpose->rows[cell.block_id].cells.emplace_back(i, cell.position);
  715. const int ncols = transpose->rows[cell.block_id].block.size;
  716. transpose->rows[cell.block_id].nnz += nrows * ncols;
  717. }
  718. }
  719. ComputeCumulativeNumberOfNonZeros(transpose->rows);
  720. return transpose;
  721. }
  722. double* BlockSparseMatrix::AllocateValues(int size) {
  723. if (!use_page_locked_memory_) {
  724. return new double[size];
  725. }
  726. #ifndef CERES_NO_CUDA
  727. double* values = nullptr;
  728. CHECK_EQ(cudaSuccess,
  729. cudaHostAlloc(&values, sizeof(double) * size, cudaHostAllocDefault));
  730. return values;
  731. #else
  732. LOG(FATAL) << "Page locked memory requested when CUDA is not available. "
  733. << "This is a Ceres bug; please contact the developers!";
  734. return nullptr;
  735. #endif
  736. };
  737. void BlockSparseMatrix::FreeValues(double*& values) {
  738. if (!use_page_locked_memory_) {
  739. delete[] values;
  740. values = nullptr;
  741. return;
  742. }
  743. #ifndef CERES_NO_CUDA
  744. CHECK_EQ(cudaSuccess, cudaFreeHost(values));
  745. values = nullptr;
  746. #else
  747. LOG(FATAL) << "Page locked memory requested when CUDA is not available. "
  748. << "This is a Ceres bug; please contact the developers!";
  749. #endif
  750. };
  751. } // namespace ceres::internal