TensorIteratorDynamicCasting.h 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. #pragma once
  2. #include <complex>
  3. #include <type_traits>
  4. #include <c10/core/ScalarType.h>
  5. #include <c10/util/C++17.h>
  6. #include <ATen/detail/FunctionTraits.h>
  7. #include <ATen/native/TensorIterator.h>
  8. // This file includes utilties for dynamic_casting done by TensorIterator, see CUDALoops.cuh and Loops.h.
  9. // dynamic_casting handles when the types expected by the iterator do not match the types of the arguments
  10. // to the function that is being called.
  11. // On CUDA, the cast is currently pushed down into the kernel (for performance reasons).
  12. // On CPU, there is currently an internal assert that a dynamic_cast is not needed.
  13. namespace at { namespace native {
  14. // `needs_dynamic_casting` compares the types expected by iterator
  15. // (i.e. dtypes of the operands) with the actual type of the arguments
  16. // (and returns) of func_t
  17. template<typename func_t, int nargs=function_traits<func_t>::arity>
  18. struct needs_dynamic_casting {
  19. static bool check(TensorIteratorBase& iter) {
  20. using traits = function_traits<func_t>;
  21. using cpp_type = typename traits::template arg<nargs - 1>::type;
  22. using cpp_map = c10::CppTypeToScalarType<cpp_type>;
  23. if (iter.input_dtype(nargs-1) != cpp_map::value) {
  24. return true;
  25. }
  26. return needs_dynamic_casting<func_t, nargs - 1>::check(iter);
  27. }
  28. };
  29. template<typename func_t>
  30. struct needs_dynamic_casting<func_t, 0> {
  31. static bool check(TensorIteratorBase& iter) {
  32. using traits = function_traits<func_t>;
  33. using cpp_type = typename traits::result_type;
  34. // we could assert output numbers are correct here, but checks
  35. // (including arity) are currently pushed outside of this struct.
  36. return c10::guts::if_constexpr<std::is_void<cpp_type>::value>([]() {
  37. return false;
  38. }, /* else */ [&](auto _) {
  39. // decltype(_) is used to delay computation
  40. using delayed_type = typename decltype(_)::template type_identity<cpp_type>;
  41. return iter.dtype(0) != c10::CppTypeToScalarType<delayed_type>::value;
  42. });
  43. }
  44. };
  45. }} //namespace at::native