Quantizer.h 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. #pragma once
  2. #include <c10/core/QScheme.h>
  3. #include <c10/core/MemoryFormat.h>
  4. #include <c10/macros/Macros.h>
  5. #include <c10/util/Exception.h>
  6. #include <c10/util/intrusive_ptr.h>
  7. #include <c10/core/ScalarType.h>
  8. #include <c10/core/TensorOptions.h>
  9. #include <ATen/Tensor.h>
  10. #include <ATen/TensorUtils.h>
  11. #include <ATen/core/QuantizerBase.h>
  12. #include <cmath>
  13. #include <memory>
  14. #include <utility>
  15. namespace at {
  16. /**
  17. * UnknownQuantizer is a placeholder quantizer for functions that implement
  18. * quantization in a two step process. First a tensor is allocated but with
  19. * unknown quantizer, and then the quantization kernel decides what the final
  20. * quantizer will be.
  21. */
  22. struct TORCH_API UnknownQuantizer : public Quantizer {
  23. explicit UnknownQuantizer(ScalarType scalar_type)
  24. : Quantizer(scalar_type) {}
  25. Tensor quantize(const Tensor& tensor) override;
  26. Tensor dequantize(const Tensor& qtensor) override;
  27. Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
  28. QScheme qscheme() const override;
  29. bool equalTo(QuantizerPtr other) const override;
  30. };
  31. /**
  32. * UniformQuantizer is the parent class for all uniform quantizers.
  33. * These quantization scheme will map float value uniformly to
  34. * the quantized value. For example, affine quantizer is
  35. * the most commonly used scheme in this category.
  36. */
  37. struct TORCH_API UniformQuantizer : public Quantizer {
  38. explicit UniformQuantizer(ScalarType scalar_type) : Quantizer(scalar_type) {}
  39. };
  40. /**
  41. * NonUniformQuantizer is the parent class for all non-uniform quantizers.
  42. * These quantization scheme may map float value non-uniformly to the quantized
  43. * value. K-means quantization is a representative example in this category.
  44. */
  45. struct TORCH_API NonUniformQuantizer : public Quantizer {
  46. explicit NonUniformQuantizer(ScalarType scalar_type) : Quantizer(scalar_type) {}
  47. };
  48. // There is also StochasticQuantizer which is uniform but not affine
  49. /**
  50. * AffineQuantizer uses affine transformation to do quantization.
  51. *
  52. * For quantize:
  53. * Y = clamp(round(X / scale + zero_point), min, max)
  54. * For dequantize:
  55. * X = (Y - zero_point) * scale
  56. */
  57. struct TORCH_API AffineQuantizer : public UniformQuantizer {
  58. explicit AffineQuantizer(ScalarType scalar_type) : UniformQuantizer(scalar_type) {}
  59. };
  60. // Note that we will not have Symmetric Quantizer in backend to reduce
  61. // complications in quantized kernel implementation.
  62. /**
  63. * PerTensorAffineQuantizer stores a scale and a zero_point, which is used for
  64. * all the values in the Tensor.
  65. */
  66. struct TORCH_API PerTensorAffineQuantizer : public AffineQuantizer {
  67. explicit PerTensorAffineQuantizer(ScalarType scalar_type, double scale, int64_t zero_point)
  68. : AffineQuantizer(scalar_type),
  69. scale_(scale),
  70. zero_point_(zero_point) {}
  71. Tensor quantize(const Tensor& tensor) override;
  72. Tensor dequantize(const Tensor& qtensor) override;
  73. Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
  74. QScheme qscheme() const override {
  75. return kPerTensorAffine;
  76. }
  77. double scale() const {
  78. return scale_;
  79. }
  80. int64_t zero_point() const {
  81. return zero_point_;
  82. }
  83. bool equalTo(QuantizerPtr other) const override {
  84. if (!other.get() || other->qscheme() != kPerTensorAffine) {
  85. return false;
  86. }
  87. auto* other_per_tensor_affine =
  88. static_cast<PerTensorAffineQuantizer*>(other.get());
  89. return scalar_type() == other_per_tensor_affine->scalar_type() &&
  90. scale() == other_per_tensor_affine->scale() &&
  91. zero_point() == other_per_tensor_affine->zero_point();
  92. }
  93. private:
  94. const double scale_;
  95. // We use int64_t for consistency with Python
  96. const int64_t zero_point_;
  97. };
  98. /**
  99. * PerChannelAffineQuantizer is the same as PerTensorAffineQuantizer
  100. * except that we have an independent scale and zero_point parameter
  101. * for each channel.
  102. *
  103. * Also note that per channel quantization is mostly applied to output channels
  104. * of weights since per-input channel of weight quantization or per-channel
  105. * quantization for activations can't be efficiently supported in most of
  106. * processors since it requires each multiplication result within a single
  107. * dot-product to have a different scale.
  108. */
  109. struct TORCH_API PerChannelAffineQuantizer : public AffineQuantizer {
  110. explicit PerChannelAffineQuantizer(
  111. ScalarType scalar_type,
  112. Tensor scales,
  113. Tensor zero_points,
  114. int64_t axis)
  115. : AffineQuantizer(scalar_type),
  116. scales_(std::move(scales)),
  117. zero_points_(std::move(zero_points)),
  118. axis_(axis) {}
  119. QScheme qscheme() const override {
  120. return kPerChannelAffine;
  121. }
  122. Tensor scales() const {
  123. return scales_;
  124. }
  125. Tensor zero_points() const {
  126. return zero_points_;
  127. }
  128. int64_t axis() const {
  129. return axis_;
  130. }
  131. Tensor quantize(const Tensor& tensor) override;
  132. Tensor dequantize(const Tensor& qtensor) override;
  133. Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
  134. bool equalTo(QuantizerPtr other) const override {
  135. if (!other.get() || other->qscheme() != kPerChannelAffine) {
  136. return false;
  137. }
  138. auto* other_per_channel_affine =
  139. static_cast<PerChannelAffineQuantizer*>(other.get());
  140. return scalar_type() == other_per_channel_affine->scalar_type() &&
  141. scales().equal(other_per_channel_affine->scales()) &&
  142. zero_points().equal(other_per_channel_affine->zero_points()) &&
  143. axis() == other_per_channel_affine->axis();
  144. }
  145. protected:
  146. Tensor scales_;
  147. Tensor zero_points_;
  148. const int64_t axis_;
  149. };
  150. /**
  151. * PerChannelAffineFloatQParamsQuantizer is the same as PerChannelAffineQuantizer
  152. * except that it expects both scale and zero point to be floating point values.
  153. *
  154. * This quantizer uses the kPerChannelAffineFloatQParams qscheme which is a variant of
  155. * kPerChannelAffine.
  156. *
  157. * The quantize equation in this case looks like -
  158. * Xq = (Xf - zero_point) * inv_scale, where inv_scale = 1.0/scale
  159. *
  160. * Note: Usage of floating point zero point is useful in cases where 0 doesn't need to
  161. * be exactly represented in the quantized space. We can get additional precision by
  162. * using floating point values for zero point.
  163. */
  164. struct TORCH_API PerChannelAffineFloatQParamsQuantizer : public PerChannelAffineQuantizer {
  165. explicit PerChannelAffineFloatQParamsQuantizer(
  166. ScalarType scalar_type,
  167. Tensor scales,
  168. Tensor zero_points,
  169. int64_t axis)
  170. : PerChannelAffineQuantizer(scalar_type,
  171. scales,
  172. zero_points,
  173. axis) {}
  174. QScheme qscheme() const override {
  175. return kPerChannelAffineFloatQParams;
  176. }
  177. Tensor quantize(const Tensor& tensor) override;
  178. Tensor dequantize(const Tensor& qtensor) override;
  179. Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override;
  180. bool equalTo(QuantizerPtr other) const override {
  181. if (!other.get() || other->qscheme() != kPerChannelAffineFloatQParams) {
  182. return false;
  183. }
  184. auto* other_per_channel_float_qparams =
  185. static_cast<PerChannelAffineFloatQParamsQuantizer*>(other.get());
  186. return scalar_type() == other_per_channel_float_qparams->scalar_type() &&
  187. scales().equal(other_per_channel_float_qparams->scales()) &&
  188. zero_points().equal(other_per_channel_float_qparams->zero_points()) &&
  189. axis() == other_per_channel_float_qparams->axis();
  190. }
  191. };
  192. // This is an internal utility function for getting at the QTensorImpl,
  193. // You should only use this for writing low level
  194. // setters/getters for QTensorImpl fields; otherwise, you should use
  195. // the low level setters/getters that were implemented using this.
  196. // This may be called repeatedly, so make sure it's pretty cheap.
  197. TORCH_API QTensorImpl* get_qtensorimpl(const TensorBase& self);
  198. // double and int64_t are because of the native function API, we only have these
  199. // argument types right now in native functions
  200. TORCH_API QuantizerPtr
  201. make_per_tensor_affine_quantizer(
  202. double scale, int64_t zero_point, ScalarType scalar_type);
  203. TORCH_API QuantizerPtr make_per_channel_affine_quantizer(
  204. const Tensor& scales,
  205. const Tensor& zero_points,
  206. int64_t axis,
  207. ScalarType scalar_type);
  208. TORCH_API QuantizerPtr make_unknown_quantizer(ScalarType scalar_type);
  209. // Create a Quantized Tensor given arguments for normal Tensor and a quantizer
  210. TORCH_API Tensor new_qtensor(
  211. IntArrayRef sizes,
  212. const TensorOptions& options,
  213. QuantizerPtr quantizer);
  214. TORCH_API void set_quantizer_(const Tensor& self, ConstQuantizerPtr quantizer);
  215. TORCH_API Tensor from_blob_quantized_per_tensor_affine(
  216. void* data,
  217. IntArrayRef sizes,
  218. IntArrayRef strides,
  219. std::function<void(void*)> deleter,
  220. const float scale,
  221. const int64_t zeroPoint,
  222. const TensorOptions& options);
  223. TORCH_API Tensor from_blob_quantized_per_tensor_affine(
  224. void* data,
  225. IntArrayRef sizes,
  226. std::function<void(void*)> deleter,
  227. const float scale,
  228. const int64_t zeroPoint,
  229. const TensorOptions& options);
  230. TORCH_API Tensor from_blob_quantized_per_channel_affine(
  231. void* data,
  232. IntArrayRef sizes,
  233. std::function<void(void*)> deleter,
  234. const Tensor& scales,
  235. const Tensor& zero_points,
  236. const int64_t axis,
  237. const TensorOptions& options);
  238. } // namespace at