123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186 |
- // Ceres Solver - A fast non-linear least squares minimizer
- // Copyright 2023 Google Inc. All rights reserved.
- // http://ceres-solver.org/
- //
- // Redistribution and use in source and binary forms, with or without
- // modification, are permitted provided that the following conditions are met:
- //
- // * Redistributions of source code must retain the above copyright notice,
- // this list of conditions and the following disclaimer.
- // * Redistributions in binary form must reproduce the above copyright notice,
- // this list of conditions and the following disclaimer in the documentation
- // and/or other materials provided with the distribution.
- // * Neither the name of Google Inc. nor the names of its contributors may be
- // used to endorse or promote products derived from this software without
- // specific prior written permission.
- //
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- // POSSIBILITY OF SUCH DAMAGE.
- //
- // Authors: vitus@google.com (Michael Vitus),
- // dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
- #ifndef CERES_INTERNAL_PARALLEL_FOR_H_
- #define CERES_INTERNAL_PARALLEL_FOR_H_
- #include <mutex>
- #include <vector>
- #include "ceres/context_impl.h"
- #include "ceres/internal/eigen.h"
- #include "ceres/internal/export.h"
- #include "ceres/parallel_invoke.h"
- #include "ceres/partition_range_for_parallel_for.h"
- #include "glog/logging.h"
- namespace ceres::internal {
- // Use a dummy mutex if num_threads = 1.
- inline decltype(auto) MakeConditionalLock(const int num_threads,
- std::mutex& m) {
- return (num_threads == 1) ? std::unique_lock<std::mutex>{}
- : std::unique_lock<std::mutex>{m};
- }
- // Execute the function for every element in the range [start, end) with at most
- // num_threads. It will execute all the work on the calling thread if
- // num_threads or (end - start) is equal to 1.
- // Depending on function signature, it will be supplied with either loop index
- // or a range of loop indicies; function can also be supplied with thread_id.
- // The following function signatures are supported:
- // - Functions accepting a single loop index:
- // - [](int index) { ... }
- // - [](int thread_id, int index) { ... }
- // - Functions accepting a range of loop index:
- // - [](std::tuple<int, int> index) { ... }
- // - [](int thread_id, std::tuple<int, int> index) { ... }
- //
- // When distributing workload between threads, it is assumed that each loop
- // iteration takes approximately equal time to complete.
- template <typename F>
- void ParallelFor(ContextImpl* context,
- int start,
- int end,
- int num_threads,
- F&& function,
- int min_block_size = 1) {
- CHECK_GT(num_threads, 0);
- if (start >= end) {
- return;
- }
- if (num_threads == 1 || end - start < min_block_size * 2) {
- InvokeOnSegment(0, std::make_tuple(start, end), std::forward<F>(function));
- return;
- }
- CHECK(context != nullptr);
- ParallelInvoke(context,
- start,
- end,
- num_threads,
- std::forward<F>(function),
- min_block_size);
- }
- // Execute function for every element in the range [start, end) with at most
- // num_threads, using user-provided partitions array.
- // When distributing workload between threads, it is assumed that each segment
- // bounded by adjacent elements of partitions array takes approximately equal
- // time to process.
- template <typename F>
- void ParallelFor(ContextImpl* context,
- int start,
- int end,
- int num_threads,
- F&& function,
- const std::vector<int>& partitions) {
- CHECK_GT(num_threads, 0);
- if (start >= end) {
- return;
- }
- CHECK_EQ(partitions.front(), start);
- CHECK_EQ(partitions.back(), end);
- if (num_threads == 1 || end - start <= num_threads) {
- ParallelFor(context, start, end, num_threads, std::forward<F>(function));
- return;
- }
- CHECK_GT(partitions.size(), 1);
- const int num_partitions = partitions.size() - 1;
- ParallelFor(context,
- 0,
- num_partitions,
- num_threads,
- [&function, &partitions](int thread_id,
- std::tuple<int, int> partition_ids) {
- // partition_ids is a range of partition indices
- const auto [partition_start, partition_end] = partition_ids;
- // Execution over several adjacent segments is equivalent
- // to execution over union of those segments (which is also a
- // contiguous segment)
- const int range_start = partitions[partition_start];
- const int range_end = partitions[partition_end];
- // Range of original loop indices
- const auto range = std::make_tuple(range_start, range_end);
- InvokeOnSegment(thread_id, range, function);
- });
- }
- // Execute function for every element in the range [start, end) with at most
- // num_threads, taking into account user-provided integer cumulative costs of
- // iterations. Cumulative costs of iteration for indices in range [0, end) are
- // stored in objects from cumulative_cost_data. User-provided
- // cumulative_cost_fun returns non-decreasing integer values corresponding to
- // inclusive cumulative cost of loop iterations, provided with a reference to
- // user-defined object. Only indices from [start, end) will be referenced. This
- // routine assumes that cumulative_cost_fun is non-decreasing (in other words,
- // all costs are non-negative);
- // When distributing workload between threads, input range of loop indices will
- // be partitioned into disjoint contiguous intervals, with the maximal cost
- // being minimized.
- // For example, with iteration costs of [1, 1, 5, 3, 1, 4] cumulative_cost_fun
- // should return [1, 2, 7, 10, 11, 15], and with num_threads = 4 this range
- // will be split into segments [0, 2) [2, 3) [3, 5) [5, 6) with costs
- // [2, 5, 4, 4].
- template <typename F, typename CumulativeCostData, typename CumulativeCostFun>
- void ParallelFor(ContextImpl* context,
- int start,
- int end,
- int num_threads,
- F&& function,
- const CumulativeCostData* cumulative_cost_data,
- CumulativeCostFun&& cumulative_cost_fun) {
- CHECK_GT(num_threads, 0);
- if (start >= end) {
- return;
- }
- if (num_threads == 1 || end - start <= num_threads) {
- ParallelFor(context, start, end, num_threads, std::forward<F>(function));
- return;
- }
- // Creating several partitions allows us to tolerate imperfections of
- // partitioning and user-supplied iteration costs up to a certain extent
- constexpr int kNumPartitionsPerThread = 4;
- const int kMaxPartitions = num_threads * kNumPartitionsPerThread;
- const auto& partitions = PartitionRangeForParallelFor(
- start,
- end,
- kMaxPartitions,
- cumulative_cost_data,
- std::forward<CumulativeCostFun>(cumulative_cost_fun));
- CHECK_GT(partitions.size(), 1);
- ParallelFor(
- context, start, end, num_threads, std::forward<F>(function), partitions);
- }
- } // namespace ceres::internal
- #endif // CERES_INTERNAL_PARALLEL_FOR_H_
|