cuda_kernels_vector_ops_test.cc 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. // Ceres Solver - A fast non-linear least squares minimizer
  2. // Copyright 2023 Google Inc. All rights reserved.
  3. // http://ceres-solver.org/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are met:
  7. //
  8. // * Redistributions of source code must retain the above copyright notice,
  9. // this list of conditions and the following disclaimer.
  10. // * Redistributions in binary form must reproduce the above copyright notice,
  11. // this list of conditions and the following disclaimer in the documentation
  12. // and/or other materials provided with the distribution.
  13. // * Neither the name of Google Inc. nor the names of its contributors may be
  14. // used to endorse or promote products derived from this software without
  15. // specific prior written permission.
  16. //
  17. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  18. // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19. // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20. // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  21. // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22. // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23. // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  24. // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  25. // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  26. // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  27. // POSSIBILITY OF SUCH DAMAGE.
  28. //
  29. // Author: joydeepb@cs.utexas.edu (Joydeep Biswas)
  30. #include "ceres/cuda_kernels_vector_ops.h"
  31. #include <math.h>
  32. #include <limits>
  33. #include <string>
  34. #include <vector>
  35. #include "ceres/context_impl.h"
  36. #include "ceres/cuda_buffer.h"
  37. #include "ceres/internal/config.h"
  38. #include "ceres/internal/eigen.h"
  39. #include "glog/logging.h"
  40. #include "gtest/gtest.h"
  41. namespace ceres {
  42. namespace internal {
  43. #ifndef CERES_NO_CUDA
  44. TEST(CudaFP64ToFP32, SimpleConversions) {
  45. ContextImpl context;
  46. std::string cuda_error;
  47. EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
  48. std::vector<double> fp64_cpu = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
  49. CudaBuffer<double> fp64_gpu(&context);
  50. fp64_gpu.CopyFromCpuVector(fp64_cpu);
  51. CudaBuffer<float> fp32_gpu(&context);
  52. fp32_gpu.Reserve(fp64_cpu.size());
  53. CudaFP64ToFP32(fp64_gpu.data(),
  54. fp32_gpu.data(),
  55. fp64_cpu.size(),
  56. context.DefaultStream());
  57. std::vector<float> fp32_cpu(fp64_cpu.size());
  58. fp32_gpu.CopyToCpu(fp32_cpu.data(), fp32_cpu.size());
  59. for (int i = 0; i < fp32_cpu.size(); ++i) {
  60. EXPECT_EQ(fp32_cpu[i], static_cast<float>(fp64_cpu[i]));
  61. }
  62. }
  63. TEST(CudaFP64ToFP32, NumericallyExtremeValues) {
  64. ContextImpl context;
  65. std::string cuda_error;
  66. EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
  67. std::vector<double> fp64_cpu = {
  68. DBL_MIN, 10.0 * DBL_MIN, DBL_MAX, 0.1 * DBL_MAX};
  69. // First just make sure that the compiler has represented these values
  70. // accurately as fp64.
  71. EXPECT_GT(fp64_cpu[0], 0.0);
  72. EXPECT_GT(fp64_cpu[1], 0.0);
  73. EXPECT_TRUE(std::isfinite(fp64_cpu[2]));
  74. EXPECT_TRUE(std::isfinite(fp64_cpu[3]));
  75. CudaBuffer<double> fp64_gpu(&context);
  76. fp64_gpu.CopyFromCpuVector(fp64_cpu);
  77. CudaBuffer<float> fp32_gpu(&context);
  78. fp32_gpu.Reserve(fp64_cpu.size());
  79. CudaFP64ToFP32(fp64_gpu.data(),
  80. fp32_gpu.data(),
  81. fp64_cpu.size(),
  82. context.DefaultStream());
  83. std::vector<float> fp32_cpu(fp64_cpu.size());
  84. fp32_gpu.CopyToCpu(fp32_cpu.data(), fp32_cpu.size());
  85. EXPECT_EQ(fp32_cpu[0], 0.0f);
  86. EXPECT_EQ(fp32_cpu[1], 0.0f);
  87. EXPECT_EQ(fp32_cpu[2], std::numeric_limits<float>::infinity());
  88. EXPECT_EQ(fp32_cpu[3], std::numeric_limits<float>::infinity());
  89. }
  90. TEST(CudaFP32ToFP64, SimpleConversions) {
  91. ContextImpl context;
  92. std::string cuda_error;
  93. EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
  94. std::vector<float> fp32_cpu = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
  95. CudaBuffer<float> fp32_gpu(&context);
  96. fp32_gpu.CopyFromCpuVector(fp32_cpu);
  97. CudaBuffer<double> fp64_gpu(&context);
  98. fp64_gpu.Reserve(fp32_cpu.size());
  99. CudaFP32ToFP64(fp32_gpu.data(),
  100. fp64_gpu.data(),
  101. fp32_cpu.size(),
  102. context.DefaultStream());
  103. std::vector<double> fp64_cpu(fp32_cpu.size());
  104. fp64_gpu.CopyToCpu(fp64_cpu.data(), fp64_cpu.size());
  105. for (int i = 0; i < fp64_cpu.size(); ++i) {
  106. EXPECT_EQ(fp64_cpu[i], static_cast<double>(fp32_cpu[i]));
  107. }
  108. }
  109. TEST(CudaSetZeroFP32, NonZeroInput) {
  110. ContextImpl context;
  111. std::string cuda_error;
  112. EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
  113. std::vector<float> fp32_cpu = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
  114. CudaBuffer<float> fp32_gpu(&context);
  115. fp32_gpu.CopyFromCpuVector(fp32_cpu);
  116. CudaSetZeroFP32(fp32_gpu.data(), fp32_cpu.size(), context.DefaultStream());
  117. std::vector<float> fp32_cpu_zero(fp32_cpu.size());
  118. fp32_gpu.CopyToCpu(fp32_cpu_zero.data(), fp32_cpu_zero.size());
  119. for (int i = 0; i < fp32_cpu_zero.size(); ++i) {
  120. EXPECT_EQ(fp32_cpu_zero[i], 0.0f);
  121. }
  122. }
  123. TEST(CudaSetZeroFP64, NonZeroInput) {
  124. ContextImpl context;
  125. std::string cuda_error;
  126. EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
  127. std::vector<double> fp64_cpu = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
  128. CudaBuffer<double> fp64_gpu(&context);
  129. fp64_gpu.CopyFromCpuVector(fp64_cpu);
  130. CudaSetZeroFP64(fp64_gpu.data(), fp64_cpu.size(), context.DefaultStream());
  131. std::vector<double> fp64_cpu_zero(fp64_cpu.size());
  132. fp64_gpu.CopyToCpu(fp64_cpu_zero.data(), fp64_cpu_zero.size());
  133. for (int i = 0; i < fp64_cpu_zero.size(); ++i) {
  134. EXPECT_EQ(fp64_cpu_zero[i], 0.0);
  135. }
  136. }
  137. TEST(CudaDsxpy, DoubleValues) {
  138. ContextImpl context;
  139. std::string cuda_error;
  140. EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
  141. std::vector<float> fp32_cpu_a = {1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
  142. std::vector<double> fp64_cpu_b = {
  143. 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0};
  144. CudaBuffer<float> fp32_gpu_a(&context);
  145. fp32_gpu_a.CopyFromCpuVector(fp32_cpu_a);
  146. CudaBuffer<double> fp64_gpu_b(&context);
  147. fp64_gpu_b.CopyFromCpuVector(fp64_cpu_b);
  148. CudaDsxpy(fp64_gpu_b.data(),
  149. fp32_gpu_a.data(),
  150. fp32_gpu_a.size(),
  151. context.DefaultStream());
  152. fp64_gpu_b.CopyToCpu(fp64_cpu_b.data(), fp64_cpu_b.size());
  153. for (int i = 0; i < fp64_cpu_b.size(); ++i) {
  154. EXPECT_DOUBLE_EQ(fp64_cpu_b[i], 2.0 * fp32_cpu_a[i]);
  155. }
  156. }
  157. TEST(CudaDtDxpy, ComputeFourItems) {
  158. ContextImpl context;
  159. std::string cuda_error;
  160. EXPECT_TRUE(context.InitCuda(&cuda_error)) << cuda_error;
  161. std::vector<double> x_cpu = {1, 2, 3, 4};
  162. std::vector<double> y_cpu = {4, 3, 2, 1};
  163. std::vector<double> d_cpu = {10, 20, 30, 40};
  164. CudaBuffer<double> x_gpu(&context);
  165. x_gpu.CopyFromCpuVector(x_cpu);
  166. CudaBuffer<double> y_gpu(&context);
  167. y_gpu.CopyFromCpuVector(y_cpu);
  168. CudaBuffer<double> d_gpu(&context);
  169. d_gpu.CopyFromCpuVector(d_cpu);
  170. CudaDtDxpy(y_gpu.data(),
  171. d_gpu.data(),
  172. x_gpu.data(),
  173. y_gpu.size(),
  174. context.DefaultStream());
  175. y_gpu.CopyToCpu(y_cpu.data(), y_cpu.size());
  176. EXPECT_DOUBLE_EQ(y_cpu[0], 4.0 + 10.0 * 10.0 * 1.0);
  177. EXPECT_DOUBLE_EQ(y_cpu[1], 3.0 + 20.0 * 20.0 * 2.0);
  178. EXPECT_DOUBLE_EQ(y_cpu[2], 2.0 + 30.0 * 30.0 * 3.0);
  179. EXPECT_DOUBLE_EQ(y_cpu[3], 1.0 + 40.0 * 40.0 * 4.0);
  180. }
  181. #endif // CERES_NO_CUDA
  182. } // namespace internal
  183. } // namespace ceres