123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445 |
- // Ceres Solver - A fast non-linear least squares minimizer
- // Copyright 2023 Google Inc. All rights reserved.
- // http://ceres-solver.org/
- //
- // Redistribution and use in source and binary forms, with or without
- // modification, are permitted provided that the following conditions are met:
- //
- // * Redistributions of source code must retain the above copyright notice,
- // this list of conditions and the following disclaimer.
- // * Redistributions in binary form must reproduce the above copyright notice,
- // this list of conditions and the following disclaimer in the documentation
- // and/or other materials provided with the distribution.
- // * Neither the name of Google Inc. nor the names of its contributors may be
- // used to endorse or promote products derived from this software without
- // specific prior written permission.
- //
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- // POSSIBILITY OF SUCH DAMAGE.
- //
- // Authors: joydeepb@cs.utexas.edu (Joydeep Biswas)
- #include <memory>
- #include <random>
- #include <string>
- #include "Eigen/Dense"
- #include "benchmark/benchmark.h"
- #include "ceres/block_jacobi_preconditioner.h"
- #include "ceres/block_sparse_matrix.h"
- #include "ceres/context_impl.h"
- #include "ceres/cuda_sparse_matrix.h"
- #include "ceres/cuda_vector.h"
- #include "ceres/fake_bundle_adjustment_jacobian.h"
- #include "ceres/internal/config.h"
- #include "ceres/internal/eigen.h"
- #include "ceres/linear_solver.h"
- #ifndef CERES_NO_CUDA
- #include "cuda_runtime.h"
- #endif
- namespace ceres::internal {
- constexpr int kNumCameras = 1000;
- constexpr int kNumPoints = 10000;
- constexpr int kCameraSize = 6;
- constexpr int kPointSize = 3;
- constexpr double kVisibility = 0.1;
- constexpr int kNumRowBlocks = 100000;
- constexpr int kNumColBlocks = 10000;
- constexpr int kMinRowBlockSize = 1;
- constexpr int kMaxRowBlockSize = 5;
- constexpr int kMinColBlockSize = 1;
- constexpr int kMaxColBlockSize = 15;
- constexpr double kBlockDensity = 5.0 / kNumColBlocks;
- static void BM_BlockSparseRightMultiplyAndAccumulateBA(
- benchmark::State& state) {
- const int num_threads = static_cast<int>(state.range(0));
- std::mt19937 prng;
- auto jacobian = CreateFakeBundleAdjustmentJacobian(
- kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
- ContextImpl context;
- context.EnsureMinimumThreads(num_threads);
- Vector x(jacobian->num_cols());
- Vector y(jacobian->num_rows());
- x.setRandom();
- y.setRandom();
- double sum = 0;
- for (auto _ : state) {
- jacobian->RightMultiplyAndAccumulate(
- x.data(), y.data(), &context, num_threads);
- sum += y.norm();
- }
- CHECK_NE(sum, 0.0);
- }
- BENCHMARK(BM_BlockSparseRightMultiplyAndAccumulateBA)
- ->Arg(1)
- ->Arg(2)
- ->Arg(4)
- ->Arg(8)
- ->Arg(16);
- static void BM_BlockSparseRightMultiplyAndAccumulateUnstructured(
- benchmark::State& state) {
- const int num_threads = static_cast<int>(state.range(0));
- BlockSparseMatrix::RandomMatrixOptions options;
- options.num_row_blocks = kNumRowBlocks;
- options.num_col_blocks = kNumColBlocks;
- options.min_row_block_size = kMinRowBlockSize;
- options.min_col_block_size = kMinColBlockSize;
- options.max_row_block_size = kMaxRowBlockSize;
- options.max_col_block_size = kMaxColBlockSize;
- options.block_density = kBlockDensity;
- std::mt19937 prng;
- auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
- ContextImpl context;
- context.EnsureMinimumThreads(num_threads);
- Vector x(jacobian->num_cols());
- Vector y(jacobian->num_rows());
- x.setRandom();
- y.setRandom();
- double sum = 0;
- for (auto _ : state) {
- jacobian->RightMultiplyAndAccumulate(
- x.data(), y.data(), &context, num_threads);
- sum += y.norm();
- }
- CHECK_NE(sum, 0.0);
- }
- BENCHMARK(BM_BlockSparseRightMultiplyAndAccumulateUnstructured)
- ->Arg(1)
- ->Arg(2)
- ->Arg(4)
- ->Arg(8)
- ->Arg(16);
- static void BM_BlockSparseLeftMultiplyAndAccumulateBA(benchmark::State& state) {
- std::mt19937 prng;
- auto jacobian = CreateFakeBundleAdjustmentJacobian(
- kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
- Vector x(jacobian->num_rows());
- Vector y(jacobian->num_cols());
- x.setRandom();
- y.setRandom();
- double sum = 0;
- for (auto _ : state) {
- jacobian->LeftMultiplyAndAccumulate(x.data(), y.data());
- sum += y.norm();
- }
- CHECK_NE(sum, 0.0);
- }
- BENCHMARK(BM_BlockSparseLeftMultiplyAndAccumulateBA);
- static void BM_BlockSparseLeftMultiplyAndAccumulateUnstructured(
- benchmark::State& state) {
- BlockSparseMatrix::RandomMatrixOptions options;
- options.num_row_blocks = 100000;
- options.num_col_blocks = 10000;
- options.min_row_block_size = 1;
- options.min_col_block_size = 1;
- options.max_row_block_size = 10;
- options.max_col_block_size = 15;
- options.block_density = 5.0 / options.num_col_blocks;
- std::mt19937 prng;
- auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
- Vector x(jacobian->num_rows());
- Vector y(jacobian->num_cols());
- x.setRandom();
- y.setRandom();
- double sum = 0;
- for (auto _ : state) {
- jacobian->LeftMultiplyAndAccumulate(x.data(), y.data());
- sum += y.norm();
- }
- CHECK_NE(sum, 0.0);
- }
- BENCHMARK(BM_BlockSparseLeftMultiplyAndAccumulateUnstructured);
- static void BM_CRSRightMultiplyAndAccumulateBA(benchmark::State& state) {
- const int num_threads = static_cast<int>(state.range(0));
- std::mt19937 prng;
- auto bsm_jacobian = CreateFakeBundleAdjustmentJacobian(
- kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
- auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix();
- ContextImpl context;
- context.EnsureMinimumThreads(num_threads);
- Vector x(jacobian->num_cols());
- Vector y(jacobian->num_rows());
- x.setRandom();
- y.setRandom();
- double sum = 0;
- for (auto _ : state) {
- jacobian->RightMultiplyAndAccumulate(
- x.data(), y.data(), &context, num_threads);
- sum += y.norm();
- }
- CHECK_NE(sum, 0.0);
- }
- BENCHMARK(BM_CRSRightMultiplyAndAccumulateBA)
- ->Arg(1)
- ->Arg(2)
- ->Arg(4)
- ->Arg(8)
- ->Arg(16);
- static void BM_CRSRightMultiplyAndAccumulateUnstructured(
- benchmark::State& state) {
- const int num_threads = static_cast<int>(state.range(0));
- BlockSparseMatrix::RandomMatrixOptions options;
- options.num_row_blocks = kNumRowBlocks;
- options.num_col_blocks = kNumColBlocks;
- options.min_row_block_size = kMinRowBlockSize;
- options.min_col_block_size = kMinColBlockSize;
- options.max_row_block_size = kMaxRowBlockSize;
- options.max_col_block_size = kMaxColBlockSize;
- options.block_density = kBlockDensity;
- std::mt19937 prng;
- auto bsm_jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
- auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix();
- ContextImpl context;
- context.EnsureMinimumThreads(num_threads);
- Vector x(jacobian->num_cols());
- Vector y(jacobian->num_rows());
- x.setRandom();
- y.setRandom();
- double sum = 0;
- for (auto _ : state) {
- jacobian->RightMultiplyAndAccumulate(
- x.data(), y.data(), &context, num_threads);
- sum += y.norm();
- }
- CHECK_NE(sum, 0.0);
- }
- BENCHMARK(BM_CRSRightMultiplyAndAccumulateUnstructured)
- ->Arg(1)
- ->Arg(2)
- ->Arg(4)
- ->Arg(8)
- ->Arg(16);
- static void BM_CRSLeftMultiplyAndAccumulateBA(benchmark::State& state) {
- std::mt19937 prng;
- // Perform setup here
- auto bsm_jacobian = CreateFakeBundleAdjustmentJacobian(
- kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
- auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix();
- Vector x(jacobian->num_rows());
- Vector y(jacobian->num_cols());
- x.setRandom();
- y.setRandom();
- double sum = 0;
- for (auto _ : state) {
- // This code gets timed
- jacobian->LeftMultiplyAndAccumulate(x.data(), y.data());
- sum += y.norm();
- }
- CHECK_NE(sum, 0.0);
- }
- BENCHMARK(BM_CRSLeftMultiplyAndAccumulateBA);
- static void BM_CRSLeftMultiplyAndAccumulateUnstructured(
- benchmark::State& state) {
- BlockSparseMatrix::RandomMatrixOptions options;
- options.num_row_blocks = kNumRowBlocks;
- options.num_col_blocks = kNumColBlocks;
- options.min_row_block_size = kMinRowBlockSize;
- options.min_col_block_size = kMinColBlockSize;
- options.max_row_block_size = kMaxRowBlockSize;
- options.max_col_block_size = kMaxColBlockSize;
- options.block_density = kBlockDensity;
- std::mt19937 prng;
- auto bsm_jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
- auto jacobian = bsm_jacobian->ToCompressedRowSparseMatrix();
- Vector x(jacobian->num_rows());
- Vector y(jacobian->num_cols());
- x.setRandom();
- y.setRandom();
- double sum = 0;
- for (auto _ : state) {
- // This code gets timed
- jacobian->LeftMultiplyAndAccumulate(x.data(), y.data());
- sum += y.norm();
- }
- CHECK_NE(sum, 0.0);
- }
- BENCHMARK(BM_CRSLeftMultiplyAndAccumulateUnstructured);
- #ifndef CERES_NO_CUDA
- static void BM_CudaRightMultiplyAndAccumulateBA(benchmark::State& state) {
- std::mt19937 prng;
- auto jacobian = CreateFakeBundleAdjustmentJacobian(
- kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
- ContextImpl context;
- std::string message;
- context.InitCuda(&message);
- auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix();
- CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs);
- CudaVector cuda_x(&context, 0);
- CudaVector cuda_y(&context, 0);
- Vector x(jacobian->num_cols());
- Vector y(jacobian->num_rows());
- x.setRandom();
- y.setRandom();
- cuda_x.CopyFromCpu(x);
- cuda_y.CopyFromCpu(y);
- double sum = 0;
- for (auto _ : state) {
- cuda_jacobian.RightMultiplyAndAccumulate(cuda_x, &cuda_y);
- sum += cuda_y.Norm();
- CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);
- }
- CHECK_NE(sum, 0.0);
- }
- BENCHMARK(BM_CudaRightMultiplyAndAccumulateBA);
- static void BM_CudaRightMultiplyAndAccumulateUnstructured(
- benchmark::State& state) {
- BlockSparseMatrix::RandomMatrixOptions options;
- options.num_row_blocks = kNumRowBlocks;
- options.num_col_blocks = kNumColBlocks;
- options.min_row_block_size = kMinRowBlockSize;
- options.min_col_block_size = kMinColBlockSize;
- options.max_row_block_size = kMaxRowBlockSize;
- options.max_col_block_size = kMaxColBlockSize;
- options.block_density = kBlockDensity;
- std::mt19937 prng;
- auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
- ContextImpl context;
- std::string message;
- context.InitCuda(&message);
- auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix();
- CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs);
- CudaVector cuda_x(&context, 0);
- CudaVector cuda_y(&context, 0);
- Vector x(jacobian->num_cols());
- Vector y(jacobian->num_rows());
- x.setRandom();
- y.setRandom();
- cuda_x.CopyFromCpu(x);
- cuda_y.CopyFromCpu(y);
- double sum = 0;
- for (auto _ : state) {
- cuda_jacobian.RightMultiplyAndAccumulate(cuda_x, &cuda_y);
- sum += cuda_y.Norm();
- CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);
- }
- CHECK_NE(sum, 0.0);
- }
- BENCHMARK(BM_CudaRightMultiplyAndAccumulateUnstructured);
- static void BM_CudaLeftMultiplyAndAccumulateBA(benchmark::State& state) {
- std::mt19937 prng;
- auto jacobian = CreateFakeBundleAdjustmentJacobian(
- kNumCameras, kNumPoints, kCameraSize, kPointSize, kVisibility, prng);
- ContextImpl context;
- std::string message;
- context.InitCuda(&message);
- auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix();
- CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs);
- CudaVector cuda_x(&context, 0);
- CudaVector cuda_y(&context, 0);
- Vector x(jacobian->num_rows());
- Vector y(jacobian->num_cols());
- x.setRandom();
- y.setRandom();
- cuda_x.CopyFromCpu(x);
- cuda_y.CopyFromCpu(y);
- double sum = 0;
- for (auto _ : state) {
- cuda_jacobian.LeftMultiplyAndAccumulate(cuda_x, &cuda_y);
- sum += cuda_y.Norm();
- CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);
- }
- CHECK_NE(sum, 0.0);
- }
- BENCHMARK(BM_CudaLeftMultiplyAndAccumulateBA);
- static void BM_CudaLeftMultiplyAndAccumulateUnstructured(
- benchmark::State& state) {
- BlockSparseMatrix::RandomMatrixOptions options;
- options.num_row_blocks = kNumRowBlocks;
- options.num_col_blocks = kNumColBlocks;
- options.min_row_block_size = kMinRowBlockSize;
- options.min_col_block_size = kMinColBlockSize;
- options.max_row_block_size = kMaxRowBlockSize;
- options.max_col_block_size = kMaxColBlockSize;
- options.block_density = kBlockDensity;
- std::mt19937 prng;
- auto jacobian = BlockSparseMatrix::CreateRandomMatrix(options, prng);
- ContextImpl context;
- std::string message;
- context.InitCuda(&message);
- auto jacobian_crs = jacobian->ToCompressedRowSparseMatrix();
- CudaSparseMatrix cuda_jacobian(&context, *jacobian_crs);
- CudaVector cuda_x(&context, 0);
- CudaVector cuda_y(&context, 0);
- Vector x(jacobian->num_rows());
- Vector y(jacobian->num_cols());
- x.setRandom();
- y.setRandom();
- cuda_x.CopyFromCpu(x);
- cuda_y.CopyFromCpu(y);
- double sum = 0;
- for (auto _ : state) {
- cuda_jacobian.LeftMultiplyAndAccumulate(cuda_x, &cuda_y);
- sum += cuda_y.Norm();
- CHECK_EQ(cudaDeviceSynchronize(), cudaSuccess);
- }
- CHECK_NE(sum, 0.0);
- }
- BENCHMARK(BM_CudaLeftMultiplyAndAccumulateUnstructured);
- #endif
- } // namespace ceres::internal
- BENCHMARK_MAIN();
|