| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445 | // Ceres Solver - A fast non-linear least squares minimizer// Copyright 2023 Google Inc. All rights reserved.// http://ceres-solver.org///// Redistribution and use in source and binary forms, with or without// modification, are permitted provided that the following conditions are met://// * Redistributions of source code must retain the above copyright notice,//   this list of conditions and the following disclaimer.// * Redistributions in binary form must reproduce the above copyright notice,//   this list of conditions and the following disclaimer in the documentation//   and/or other materials provided with the distribution.// * Neither the name of Google Inc. nor the names of its contributors may be//   used to endorse or promote products derived from this software without//   specific prior written permission.//// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE// POSSIBILITY OF SUCH DAMAGE.//// Authors: joydeepb@cs.utexas.edu (Joydeep Biswas)#include <memory>#include <random>#include <string>#include "Eigen/Dense"#include "benchmark/benchmark.h"#include "ceres/block_jacobi_preconditioner.h"#include "ceres/block_sparse_matrix.h"#include "ceres/context_impl.h"#include "ceres/cuda_sparse_matrix.h"#include "ceres/cuda_vector.h"#include "ceres/fake_bundle_adjustment_jacobian.h"#include "ceres/internal/config.h"#include "ceres/internal/eigen.h"#include "ceres/linear_solver.h"#ifndef CERES_NO_CUDA#include "cuda_runtime.h"#endifnamespace ceres::internal {constexpr int kNumCameras = 1000;constexpr int kNumPoints = 10000;constexpr int kCameraSize = 6;constexpr int kPointSize = 3;constexpr double kVisibility = 0.1;constexpr int kNumRowBlocks = 100000;constexpr int kNumColBlocks = 10000;constexpr int kMinRowBlockSize = 1;constexpr int kMaxRowBlockSize = 5;constexpr int kMinColBlockSize = 1;constexpr int kMaxColBlockSize = 15;constexpr double kBlockDensity = 5.0 / kNumColBlocks;static void BM_BlockSparseRightMultiplyAndAccumulateBA(    benchmark::State& state) {  const int num_threads = static_cast<int>(state.range(0));  std::mt19937 prng;  auto jacobian = CreateFakeBundleAdjustmentJacobian(      kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);  ContextImpl context;  context.EnsureMinimumThreads(num_threads);  Vector x(jacobian->num_cols());  Vector y(jacobian->num_rows());  x.setRandom();  y.setRandom();  double sum = 0;  for (auto _ : state) {    jacobian->RightMultiplyAndAccumulate(        x.data(), y.data(), &context, num_threads);    sum += y.norm();  }  CHECK_NE(sum, 0.0);}BENCHMARK(BM_BlockSparseRightMultiplyAndAccumulateBA)    ->Arg(1)    ->Arg(2)    ->Arg(4)    ->Arg(8)    ->Arg(16);static void BM_BlockSparseRightMultiplyAndAccumulateUnstructured(    benchmark::State& state) {  const int num_threads = static_cast<int>(state.range(0));  BlockSparseMatrix::RandomMatrixOptions options;  options.num_row_blocks = kNumRowBlocks;  options.num_col_blocks = kNumColBlocks;  options.min_row_block_size = kMinRowBlockSize;  options.min_col_block_size = kMinColBlockSize;  options.max_row_block_size = kMaxRowBlockSize;  options.max_col_block_size = kMaxColBlockSize;  options.block_density = kBlockDensity;  std::mt19937 prng;  auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);  ContextImpl context;  context.EnsureMinimumThreads(num_threads);  Vector x(jacobian->num_cols());  Vector y(jacobian->num_rows());  x.setRandom();  y.setRandom();  double sum = 0;  for (auto _ : state) {    jacobian->RightMultiplyAndAccumulate(        x.data(), y.data(), &context, num_threads);    sum += y.norm();  }  CHECK_NE(sum, 0.0);}BENCHMARK(BM_BlockSparseRightMultiplyAndAccumulateUnstructured)    ->Arg(1)    ->Arg(2)    ->Arg(4)    ->Arg(8)    ->Arg(16);static void BM_BlockSparseLeftMultiplyAndAccumulateBA(benchmark::State& state) {  std::mt19937 prng;  auto jacobian = CreateFakeBundleAdjustmentJacobian(      kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);  Vector x(jacobian->num_rows());  Vector y(jacobian->num_cols());  x.setRandom();  y.setRandom();  double sum = 0;  for (auto _ : state) {    jacobian->LeftMultiplyAndAccumulate(x.data(), y.data());    sum += y.norm();  }  CHECK_NE(sum, 0.0);}BENCHMARK(BM_BlockSparseLeftMultiplyAndAccumulateBA);static void BM_BlockSparseLeftMultiplyAndAccumulateUnstructured(    benchmark::State& state) {  BlockSparseMatrix::RandomMatrixOptions options;  options.num_row_blocks = 100000;  options.num_col_blocks = 10000;  options.min_row_block_size = 1;  options.min_col_block_size = 1;  options.max_row_block_size = 10;  options.max_col_block_size = 15;  options.block_density = 5.0 / options.num_col_blocks;  std::mt19937 prng;  auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);  Vector x(jacobian->num_rows());  Vector y(jacobian->num_cols());  x.setRandom();  y.setRandom();  double sum = 0;  for (auto _ : state) {    jacobian->LeftMultiplyAndAccumulate(x.data(), y.data());    sum += y.norm();  }  CHECK_NE(sum, 0.0);}BENCHMARK(BM_BlockSparseLeftMultiplyAndAccumulateUnstructured);static void BM_CRSRightMultiplyAndAccumulateBA(benchmark::State& state) {  const int num_threads = static_cast<int>(state.range(0));  std::mt19937 prng;  auto bsm_jacobian = CreateFakeBundleAdjustmentJacobian(      kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);  auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix();  ContextImpl context;  context.EnsureMinimumThreads(num_threads);  Vector x(jacobian->num_cols());  Vector y(jacobian->num_rows());  x.setRandom();  y.setRandom();  double sum = 0;  for (auto _ : state) {    jacobian->RightMultiplyAndAccumulate(        x.data(), y.data(), &context, num_threads);    sum += y.norm();  }  CHECK_NE(sum, 0.0);}BENCHMARK(BM_CRSRightMultiplyAndAccumulateBA)    ->Arg(1)    ->Arg(2)    ->Arg(4)    ->Arg(8)    ->Arg(16);static void BM_CRSRightMultiplyAndAccumulateUnstructured(    benchmark::State& state) {  const int num_threads = static_cast<int>(state.range(0));  BlockSparseMatrix::RandomMatrixOptions options;  options.num_row_blocks = kNumRowBlocks;  options.num_col_blocks = kNumColBlocks;  options.min_row_block_size = kMinRowBlockSize;  options.min_col_block_size = kMinColBlockSize;  options.max_row_block_size = kMaxRowBlockSize;  options.max_col_block_size = kMaxColBlockSize;  options.block_density = kBlockDensity;  std::mt19937 prng;  auto bsm_jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);  auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix();  ContextImpl context;  context.EnsureMinimumThreads(num_threads);  Vector x(jacobian->num_cols());  Vector y(jacobian->num_rows());  x.setRandom();  y.setRandom();  double sum = 0;  for (auto _ : state) {    jacobian->RightMultiplyAndAccumulate(        x.data(), y.data(), &context, num_threads);    sum += y.norm();  }  CHECK_NE(sum, 0.0);}BENCHMARK(BM_CRSRightMultiplyAndAccumulateUnstructured)    ->Arg(1)    ->Arg(2)    ->Arg(4)    ->Arg(8)    ->Arg(16);static void BM_CRSLeftMultiplyAndAccumulateBA(benchmark::State& state) {  std::mt19937 prng;  // Perform setup here  auto bsm_jacobian = CreateFakeBundleAdjustmentJacobian(      kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);  auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix();  Vector x(jacobian->num_rows());  Vector y(jacobian->num_cols());  x.setRandom();  y.setRandom();  double sum = 0;  for (auto _ : state) {    // This code gets timed    jacobian->LeftMultiplyAndAccumulate(x.data(), y.data());    sum += y.norm();  }  CHECK_NE(sum, 0.0);}BENCHMARK(BM_CRSLeftMultiplyAndAccumulateBA);static void BM_CRSLeftMultiplyAndAccumulateUnstructured(    benchmark::State& state) {  BlockSparseMatrix::RandomMatrixOptions options;  options.num_row_blocks = kNumRowBlocks;  options.num_col_blocks = kNumColBlocks;  options.min_row_block_size = kMinRowBlockSize;  options.min_col_block_size = kMinColBlockSize;  options.max_row_block_size = kMaxRowBlockSize;  options.max_col_block_size = kMaxColBlockSize;  options.block_density = kBlockDensity;  std::mt19937 prng;  auto bsm_jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);  auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix();  Vector x(jacobian->num_rows());  Vector y(jacobian->num_cols());  x.setRandom();  y.setRandom();  double sum = 0;  for (auto _ : state) {    // This code gets timed    jacobian->LeftMultiplyAndAccumulate(x.data(), y.data());    sum += y.norm();  }  CHECK_NE(sum, 0.0);}BENCHMARK(BM_CRSLeftMultiplyAndAccumulateUnstructured);#ifndef CERES_NO_CUDAstatic void BM_CudaRightMultiplyAndAccumulateBA(benchmark::State& state) {  std::mt19937 prng;  auto jacobian = CreateFakeBundleAdjustmentJacobian(      kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);  ContextImpl context;  std::string message;  context.InitCuda(&message);  auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix();  CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs);  CudaVector cuda_x(&context, 0);  CudaVector cuda_y(&context, 0);  Vector x(jacobian->num_cols());  Vector y(jacobian->num_rows());  x.setRandom();  y.setRandom();  cuda_x.CopyFromCpu(x);  cuda_y.CopyFromCpu(y);  double sum = 0;  for (auto _ : state) {    cuda_jacobian.RightMultiplyAndAccumulate(cuda_x, &cuda_y);    sum += cuda_y.Norm();    CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);  }  CHECK_NE(sum, 0.0);}BENCHMARK(BM_CudaRightMultiplyAndAccumulateBA);static void BM_CudaRightMultiplyAndAccumulateUnstructured(    benchmark::State& state) {  BlockSparseMatrix::RandomMatrixOptions options;  options.num_row_blocks = kNumRowBlocks;  options.num_col_blocks = kNumColBlocks;  options.min_row_block_size = kMinRowBlockSize;  options.min_col_block_size = kMinColBlockSize;  options.max_row_block_size = kMaxRowBlockSize;  options.max_col_block_size = kMaxColBlockSize;  options.block_density = kBlockDensity;  std::mt19937 prng;  auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);  ContextImpl context;  std::string message;  context.InitCuda(&message);  auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix();  CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs);  CudaVector cuda_x(&context, 0);  CudaVector cuda_y(&context, 0);  Vector x(jacobian->num_cols());  Vector y(jacobian->num_rows());  x.setRandom();  y.setRandom();  cuda_x.CopyFromCpu(x);  cuda_y.CopyFromCpu(y);  double sum = 0;  for (auto _ : state) {    cuda_jacobian.RightMultiplyAndAccumulate(cuda_x, &cuda_y);    sum += cuda_y.Norm();    CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);  }  CHECK_NE(sum, 0.0);}BENCHMARK(BM_CudaRightMultiplyAndAccumulateUnstructured);static void BM_CudaLeftMultiplyAndAccumulateBA(benchmark::State& state) {  std::mt19937 prng;  auto jacobian = CreateFakeBundleAdjustmentJacobian(      kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);  ContextImpl context;  std::string message;  context.InitCuda(&message);  auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix();  CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs);  CudaVector cuda_x(&context, 0);  CudaVector cuda_y(&context, 0);  Vector x(jacobian->num_rows());  Vector y(jacobian->num_cols());  x.setRandom();  y.setRandom();  cuda_x.CopyFromCpu(x);  cuda_y.CopyFromCpu(y);  double sum = 0;  for (auto _ : state) {    cuda_jacobian.LeftMultiplyAndAccumulate(cuda_x, &cuda_y);    sum += cuda_y.Norm();    CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);  }  CHECK_NE(sum, 0.0);}BENCHMARK(BM_CudaLeftMultiplyAndAccumulateBA);static void BM_CudaLeftMultiplyAndAccumulateUnstructured(    benchmark::State& state) {  BlockSparseMatrix::RandomMatrixOptions options;  options.num_row_blocks = kNumRowBlocks;  options.num_col_blocks = kNumColBlocks;  options.min_row_block_size = kMinRowBlockSize;  options.min_col_block_size = kMinColBlockSize;  options.max_row_block_size = kMaxRowBlockSize;  options.max_col_block_size = kMaxColBlockSize;  options.block_density = kBlockDensity;  std::mt19937 prng;  auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);  ContextImpl context;  std::string message;  context.InitCuda(&message);  auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix();  CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs);  CudaVector cuda_x(&context, 0);  CudaVector cuda_y(&context, 0);  Vector x(jacobian->num_rows());  Vector y(jacobian->num_cols());  x.setRandom();  y.setRandom();  cuda_x.CopyFromCpu(x);  cuda_y.CopyFromCpu(y);  double sum = 0;  for (auto _ : state) {    cuda_jacobian.LeftMultiplyAndAccumulate(cuda_x, &cuda_y);    sum += cuda_y.Norm();    CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);  }  CHECK_NE(sum, 0.0);}BENCHMARK(BM_CudaLeftMultiplyAndAccumulateUnstructured);#endif}  // namespace ceres::internalBENCHMARK_MAIN();
 |