AffineQuantizerBase.h 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647
  1. #pragma once
  2. #include <c10/macros/Export.h>
  3. #include <c10/core/ScalarType.h>
  4. namespace at {
  5. namespace native {
  6. // Quantize a float value into a uint value given scale and zero_point
  7. template <typename T>
  8. TORCH_API T quantize_val(double scale, int64_t zero_point, float value);
  9. // TODO combine this with quantize_val once the numerics for ARM are aligned
  10. // with it
  11. template <typename T>
  12. T quantize_val_arm(
  13. const float scale,
  14. const int32_t zero_point,
  15. const float value);
  16. template <typename T, int precision = 8>
  17. void quantize_vec(
  18. double scale,
  19. int64_t zero_point,
  20. const float* src,
  21. T* dst,
  22. size_t count = 8);
  23. template <typename T>
  24. TORCH_API float dequantize_val(double scale, int64_t zero_point, T value);
  25. template <typename T>
  26. TORCH_API float dequantize_vec(
  27. double scale,
  28. int64_t zero_point,
  29. const T* src,
  30. float* dst,
  31. size_t count = 8);
  32. template <typename SRC_T, typename DST_T>
  33. TORCH_API DST_T requantize_val(double, int64_t, double, int64_t, SRC_T src);
  34. // Given a multiplier and a zero_point, requantize int32_t computed values back
  35. // to quantized values. See comment above
  36. // make_per_tensor_affine_quantizer function for the usage of int64_t
  37. template <typename DST_T>
  38. TORCH_API DST_T
  39. requantize_from_int(double multiplier, int64_t zero_point, int64_t src);
  40. int quantize_val_float_qparams(float scale, float zero_point, float value, int qmin, int qmax);
  41. } // namespace native
  42. } // namespace at