cuda_kernels_bsm_to_crs.h 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. // Ceres Solver - A fast non-linear least squares minimizer
  2. // Copyright 2023 Google Inc. All rights reserved.
  3. // http://ceres-solver.org/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are met:
  7. //
  8. // * Redistributions of source code must retain the above copyright notice,
  9. // this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above copyright notice,
  11. // this list of conditions and the following disclaimer in the documentation
  12. // and/or other materials provided with the distribution.
  13. // * Neither the name of Google Inc. nor the names of its contributors may be
  14. // used to endorse or promote products derived from this software without
  15. // specific prior written permission.
  16. //
  17. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18. // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21. // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22. // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23. // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24. // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25. // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26. // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27. // POSSIBILITY OF SUCH DAMAGE.
  28. //
  29. // Authors: dmitriy.korchemkin@gmail.com (Dmitriy Korchemkin)
  30. #ifndef CERES_INTERNAL_CUDA_KERNELS_BSM_TO_CRS_H_
  31. #define CERES_INTERNAL_CUDA_KERNELS_BSM_TO_CRS_H_
  32. #include "ceres/internal/config.h"
  33. #ifndef CERES_NO_CUDA
  34. #include "cuda_runtime.h"
  35. namespace ceres {
  36. namespace internal {
  37. struct Block;
  38. struct Cell;
  39. // Compute structure of CRS matrix using block-sparse structure.
  40. // Arrays corresponding to CRS matrix are to be allocated by caller
  41. void FillCRSStructure(const int num_row_blocks,
  42. const int num_rows,
  43. const int* first_cell_in_row_block,
  44. const Cell* cells,
  45. const Block* row_blocks,
  46. const Block* col_blocks,
  47. int* rows,
  48. int* cols,
  49. cudaStream_t stream,
  50. bool memory_pools_supported);
  51. // Compute structure of partitioned CRS matrix using block-sparse structure.
  52. // Arrays corresponding to CRS matrices are to be allocated by caller
  53. void FillCRSStructurePartitioned(const int num_row_blocks,
  54. const int num_rows,
  55. const int num_row_blocks_e,
  56. const int num_col_blocks_e,
  57. const int num_nonzeros_e,
  58. const int* first_cell_in_row_block,
  59. const Cell* cells,
  60. const Block* row_blocks,
  61. const Block* col_blocks,
  62. int* rows_e,
  63. int* cols_e,
  64. int* rows_f,
  65. int* cols_f,
  66. cudaStream_t stream,
  67. bool memory_pools_supported);
  68. // Permute segment of values from block-sparse matrix with sequential layout to
  69. // CRS order. Segment starts at block_sparse_offset and has length of num_values
  70. void PermuteToCRS(const int block_sparse_offset,
  71. const int num_values,
  72. const int num_row_blocks,
  73. const int* first_cell_in_row_block,
  74. const Cell* cells,
  75. const Block* row_blocks,
  76. const Block* col_blocks,
  77. const int* crs_rows,
  78. const double* block_sparse_values,
  79. double* crs_values,
  80. cudaStream_t stream);
  81. // Permute segment of values from F sub-matrix of block-sparse partitioned
  82. // matrix with sequential layout to CRS order. Segment starts at
  83. // block_sparse_offset (including the offset induced by values of E submatrix)
  84. // and has length of num_values
  85. void PermuteToCRSPartitionedF(const int block_sparse_offset,
  86. const int num_values,
  87. const int num_row_blocks,
  88. const int num_row_blocks_e,
  89. const int* first_cell_in_row_block,
  90. const int* value_offset_row_block_f,
  91. const Cell* cells,
  92. const Block* row_blocks,
  93. const Block* col_blocks,
  94. const int* crs_rows,
  95. const double* block_sparse_values,
  96. double* crs_values,
  97. cudaStream_t stream);
  98. } // namespace internal
  99. } // namespace ceres
  100. #endif // CERES_NO_CUDA
  101. #endif // CERES_INTERNAL_CUDA_KERNELS_BSM_TO_CRS_H_