1234567891011121314151617181920212223242526272829303132333435363738394041424344454647 |
- #pragma once
- #include <c10/macros/Export.h>
- #include <c10/core/ScalarType.h>
- namespace at {
- namespace native {
- // Quantize a float value into a uint value given scale and zero_point
- template <typename T>
- TORCH_API T quantize_val(double scale, int64_t zero_point, float value);
- // TODO combine this with quantize_val once the numerics for ARM are aligned
- // with it
- template <typename T>
- T quantize_val_arm(
- const float scale,
- const int32_t zero_point,
- const float value);
- template <typename T, int precision = 8>
- void quantize_vec(
- double scale,
- int64_t zero_point,
- const float* src,
- T* dst,
- size_t count = 8);
- template <typename T>
- TORCH_API float dequantize_val(double scale, int64_t zero_point, T value);
- template <typename T>
- TORCH_API float dequantize_vec(
- double scale,
- int64_t zero_point,
- const T* src,
- float* dst,
- size_t count = 8);
- template <typename SRC_T, typename DST_T>
- TORCH_API DST_T requantize_val(double, int64_t, double, int64_t, SRC_T src);
- // Given a multiplier and a zero_point, requantize int32_t computed values back
- // to quantized values. See comment above
- // make_per_tensor_affine_quantizer function for the usage of int64_t
- template <typename DST_T>
- TORCH_API DST_T
- requantize_from_int(double multiplier, int64_t zero_point, int64_t src);
- int quantize_val_float_qparams(float scale, float zero_point, float value, int qmin, int qmax);
- } // namespace native
- } // namespace at
|