GridSampler.cuh 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. #pragma once
  2. #include <ATen/native/cuda/KernelUtils.cuh>
  3. #include <ATen/native/GridSamplerUtils.h>
  4. namespace at { namespace native {
  5. using detail::GridSamplerInterpolation;
  6. using detail::GridSamplerPadding;
  7. // Unnormalizes a coordinate from the -1 to +1 scale to its pixel index value,
  8. // where we view each pixel as an area between (idx - 0.5) and (idx + 0.5).
  9. // if align_corners: -1 and +1 get sent to the centers of the corner pixels
  10. // -1 --> 0
  11. // +1 --> (size - 1)
  12. // scale_factor = (size - 1) / 2
  13. // if not align_corners: -1 and +1 get sent to the image edges
  14. // -1 --> -0.5
  15. // +1 --> (size - 1) + 0.5 == size - 0.5
  16. // scale_factor = size / 2
  17. template <typename scalar_t>
  18. static __forceinline__ __device__
  19. scalar_t grid_sampler_unnormalize(scalar_t coord, int size, bool align_corners) {
  20. if (align_corners) {
  21. // unnormalize coord from [-1, 1] to [0, size - 1]
  22. return ((coord + 1.f) / 2) * (size - 1);
  23. } else {
  24. // unnormalize coord from [-1, 1] to [-0.5, size - 0.5]
  25. return ((coord + 1.f) * size - 1) / 2;
  26. }
  27. }
  28. // grid_sampler_unnormalize_set_grad works the same as grid_sampler_unnormalize
  29. // except that it also returns the `d output / d input` via pointer argument
  30. // `grad_in`.
  31. // This is useful in the backward pass of grid_sampler.
  32. template <typename scalar_t>
  33. static __forceinline__ __device__
  34. scalar_t grid_sampler_unnormalize_set_grad(scalar_t coord, int size,
  35. bool align_corners, scalar_t *grad_in) {
  36. if (align_corners) {
  37. // unnormalize coord from [-1, 1] to [0, size - 1]
  38. *grad_in = static_cast<scalar_t>(size - 1) / 2;
  39. return ((coord + 1.f) / 2) * (size - 1);
  40. } else {
  41. // unnormalize coord from [-1, 1] to [-0.5, size - 0.5]
  42. *grad_in = static_cast<scalar_t>(size) / 2;
  43. return ((coord + 1.f) * size - 1) / 2;
  44. }
  45. }
  46. // Clips coordinates to between 0 and clip_limit - 1
  47. template <typename scalar_t>
  48. static __forceinline__ __device__
  49. scalar_t clip_coordinates(scalar_t in, int clip_limit) {
  50. return ::min(static_cast<scalar_t>(clip_limit - 1), ::max(in, static_cast<scalar_t>(0)));
  51. }
  52. // clip_coordinates_set_grad works similarly to clip_coordinates except that
  53. // it also returns the `d output / d input` via pointer argument `grad_in`.
  54. // This is useful in the backward pass of grid_sampler.
  55. template <typename scalar_t>
  56. static __forceinline__ __device__
  57. scalar_t clip_coordinates_set_grad(scalar_t in, int clip_limit, scalar_t *grad_in) {
  58. // Note that it is important for the gradient calculation that borders
  59. // are considered out of bounds.
  60. if (in <= static_cast<scalar_t>(0)) {
  61. *grad_in = static_cast<scalar_t>(0);
  62. return static_cast<scalar_t>(0);
  63. } else {
  64. scalar_t max = static_cast<scalar_t>(clip_limit - 1);
  65. if (in >= max) {
  66. *grad_in = static_cast<scalar_t>(0);
  67. return max;
  68. } else {
  69. *grad_in = static_cast<scalar_t>(1);
  70. return in;
  71. }
  72. }
  73. }
  74. // Reflects coordinates until they fall between low and high (inclusive).
  75. // The bounds are passed as twice their value so that half-integer values
  76. // can be represented as ints.
  77. template <typename scalar_t>
  78. static __forceinline__ __device__
  79. scalar_t reflect_coordinates(scalar_t in, int twice_low, int twice_high) {
  80. if (twice_low == twice_high) {
  81. return static_cast<scalar_t>(0);
  82. }
  83. scalar_t min = static_cast<scalar_t>(twice_low) / 2;
  84. scalar_t span = static_cast<scalar_t>(twice_high - twice_low) / 2;
  85. in = ::fabs(in - min);
  86. // `fmod` returns same sign as `in`, which is positive after the `fabs` above.
  87. scalar_t extra = ::fmod(in, span);
  88. int flips = static_cast<int>(::floor(in / span));
  89. if (flips % 2 == 0) {
  90. return extra + min;
  91. } else {
  92. return span - extra + min;
  93. }
  94. }
  95. // reflect_coordinates_set_grad works similarly to reflect_coordinates except
  96. // that it also returns the `d output / d input` via pointer argument
  97. // `grad_in`.
  98. // This is useful in the backward pass of grid_sampler.
  99. template <typename scalar_t>
  100. static __forceinline__ __device__
  101. scalar_t reflect_coordinates_set_grad(scalar_t in, int twice_low, int twice_high,
  102. scalar_t *grad_in) {
  103. if (twice_low == twice_high) {
  104. *grad_in = static_cast<scalar_t>(0);
  105. return static_cast<scalar_t>(0);
  106. }
  107. int grad_in_mult_;
  108. scalar_t min = static_cast<scalar_t>(twice_low) / 2;
  109. scalar_t span = static_cast<scalar_t>(twice_high - twice_low) / 2;
  110. in = in - min;
  111. if (in < static_cast<scalar_t>(0)) {
  112. grad_in_mult_ = -1;
  113. in = -in;
  114. } else {
  115. grad_in_mult_ = 1;
  116. }
  117. // `fmod` returns same sign as `in`, which is positive after the `if` above.
  118. scalar_t extra = ::fmod(in, span);
  119. int flips = static_cast<int>(::floor(in / span));
  120. if (flips % 2 == 0) {
  121. *grad_in = static_cast<scalar_t>(grad_in_mult_);
  122. return extra + min;
  123. } else {
  124. *grad_in = static_cast<scalar_t>(-grad_in_mult_);
  125. return span - extra + min;
  126. }
  127. }
  128. template<typename scalar_t>
  129. static __forceinline__ __device__
  130. scalar_t safe_downgrade_to_int_range(scalar_t x){
  131. // -100.0 does not have special meaning. This is just to make sure
  132. // it's not within_bounds_2d or within_bounds_3d, and does not cause
  133. // undefined behavior. See #35506.
  134. if (x > INT_MAX-1 || x < INT_MIN || !::isfinite(static_cast<double>(x)))
  135. return static_cast<scalar_t>(-100.0);
  136. return x;
  137. }
  138. template<typename scalar_t>
  139. static __forceinline__ __device__
  140. scalar_t compute_coordinates(scalar_t coord, int size,
  141. GridSamplerPadding padding_mode,
  142. bool align_corners) {
  143. if (padding_mode == GridSamplerPadding::Border) {
  144. // clip coordinates to image borders
  145. coord = clip_coordinates(coord, size);
  146. } else if (padding_mode == GridSamplerPadding::Reflection) {
  147. // reflect coordinates by image borders
  148. if (align_corners) {
  149. coord = reflect_coordinates(coord, 0, 2*(size - 1));
  150. } else {
  151. coord = reflect_coordinates(coord, -1, 2*size - 1);
  152. }
  153. // clip coordinates to image borders
  154. coord = clip_coordinates(coord, size);
  155. }
  156. coord = safe_downgrade_to_int_range(coord);
  157. return coord;
  158. }
  159. // Computes the pixel source index value for a grid coordinate
  160. template <typename scalar_t>
  161. static __forceinline__ __device__
  162. scalar_t grid_sampler_compute_source_index(
  163. scalar_t coord,
  164. int size,
  165. GridSamplerPadding padding_mode,
  166. bool align_corners) {
  167. coord = grid_sampler_unnormalize(coord, size, align_corners);
  168. coord = compute_coordinates(coord, size, padding_mode, align_corners);
  169. return coord;
  170. }
  171. // grid_sampler_compute_source_index_set_grad works similarly to
  172. // grid_sampler_compute_source_index except that it also returns the
  173. // `d output / d input` via pointer argument `grad_in`.
  174. // This is useful in the backward pass of grid_sampler.
  175. template <typename scalar_t>
  176. static __forceinline__ __device__
  177. scalar_t grid_sampler_compute_source_index_set_grad(
  178. scalar_t coord,
  179. int size,
  180. GridSamplerPadding padding_mode,
  181. bool align_corners,
  182. scalar_t *grad_in) {
  183. scalar_t grad_clip, grad_refl;
  184. coord = grid_sampler_unnormalize_set_grad(coord, size, align_corners, grad_in);
  185. if (padding_mode == GridSamplerPadding::Border) {
  186. // clip coordinates to image borders
  187. coord = clip_coordinates_set_grad(coord, size, &grad_clip);
  188. *grad_in = (*grad_in) * grad_clip;
  189. } else if (padding_mode == GridSamplerPadding::Reflection) {
  190. // reflect coordinates by image borders
  191. if (align_corners) {
  192. coord = reflect_coordinates_set_grad(coord, 0, 2*(size - 1), &grad_refl);
  193. } else {
  194. coord = reflect_coordinates_set_grad(coord, -1, 2*size - 1, &grad_refl);
  195. }
  196. // clip coordinates to image borders
  197. coord = clip_coordinates_set_grad(coord, size, &grad_clip);
  198. *grad_in = (*grad_in) * grad_refl * grad_clip;
  199. }
  200. coord = safe_downgrade_to_int_range(coord);
  201. return coord;
  202. }
  203. static __forceinline__ __device__
  204. bool within_bounds_2d(int h, int w, int H, int W) {
  205. return h >= 0 && h < H && w >= 0 && w < W;
  206. }
  207. static __forceinline__ __device__
  208. bool within_bounds_3d(int d, int h, int w, int D, int H, int W) {
  209. return d >= 0 && d < D && h >= 0 && h < H && w >= 0 && w < W;
  210. }
  211. template<typename scalar_t>
  212. static __forceinline__ __device__
  213. scalar_t get_value_bounded(
  214. scalar_t *data, scalar_t x, scalar_t y, int W, int H, int sW, int sH,
  215. GridSamplerPadding padding_mode,
  216. bool align_corners) {
  217. x = compute_coordinates(x, W, padding_mode, align_corners);
  218. y = compute_coordinates(y, H, padding_mode, align_corners);
  219. int ix = static_cast<int>(x);
  220. int iy = static_cast<int>(y);
  221. if (within_bounds_2d(iy, ix, H, W)) {
  222. return data[iy * sH + ix * sW];
  223. }
  224. return static_cast<scalar_t>(0);
  225. }
  226. template<typename scalar_t, typename index_t>
  227. static __forceinline__ __device__
  228. void safe_add_2d(scalar_t *data, int h, int w,
  229. int sH, int sW, int H, int W,
  230. scalar_t delta,
  231. const index_t NC_offset,
  232. const index_t memory_span) {
  233. if (within_bounds_2d(h, w, H, W)) {
  234. fastAtomicAdd(data,
  235. NC_offset + h * sH + w * sW,
  236. memory_span,
  237. delta,
  238. true);
  239. }
  240. }
  241. template<typename scalar_t, typename index_t>
  242. static __forceinline__ __device__
  243. void safe_add_3d(scalar_t *data, int d, int h, int w,
  244. int sD, int sH, int sW, int D, int H, int W,
  245. scalar_t delta,
  246. const index_t NC_offset,
  247. const index_t memory_span) {
  248. if (within_bounds_3d(d, h, w, D, H, W)) {
  249. fastAtomicAdd(data,
  250. NC_offset + d * sD + h * sH + w * sW,
  251. memory_span,
  252. delta,
  253. true);
  254. }
  255. }
  256. template<typename scalar_t, typename index_t>
  257. static __forceinline__ __device__
  258. void add_value_bounded(
  259. scalar_t* data, scalar_t x, scalar_t y, int W, int H, int sW, int sH,
  260. scalar_t delta,
  261. GridSamplerPadding padding_mode,
  262. bool align_corners,
  263. const index_t NC_offset,
  264. const index_t memory_span) {
  265. x = compute_coordinates(x, W, padding_mode, align_corners);
  266. y = compute_coordinates(y, H, padding_mode, align_corners);
  267. int ix = static_cast<int>(x);
  268. int iy = static_cast<int>(y);
  269. safe_add_2d(data, iy, ix, sH, sW, H, W, delta, NC_offset, memory_span);
  270. }
  271. // Calculate the differential of the cubic convolution, i.e. `d coeff / d x`
  272. template<typename scalar_t>
  273. static __forceinline__ __device__
  274. void get_cubic_coefficients_grad(
  275. scalar_t coeffs[4],
  276. scalar_t t) {
  277. // Must be the same as forward calculation in
  278. // aten/src/ATen/native/cuda/UpSample.cuh:get_cubic_upsample_coefficients
  279. scalar_t A = -0.75;
  280. scalar_t x;
  281. x = -1 - t; // 1 < x = |-1 - tx| < 2
  282. coeffs[0] = (-3 * A * x - 10 * A ) * x - 8 * A;
  283. x = -t; // x = |0 - tx| <= 1
  284. coeffs[1] = (-3 * (A + 2) * x - 2 * (A + 3)) * x;
  285. x = 1 - t; // x = |1 - tx| <= 1
  286. coeffs[2] = (3 * (A + 2) * x - 2 * (A + 3)) * x;
  287. x = 2 - t; // 1 < x = |2 - tx| < 2
  288. coeffs[3] = (3 * A * x - 10 * A) * x + 8 * A;
  289. }
  290. }} // namespace at::native