fbgemm.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. import torch
  2. from ._common_operator_config_utils import (
  3. _get_binary_op_configs,
  4. _get_bn_configs,
  5. _get_cat_config,
  6. _get_conv_configs,
  7. _get_default_op_configs,
  8. _get_embedding_op_configs,
  9. _get_fixed_qparams_op_configs,
  10. _get_linear_configs,
  11. _get_rnn_op_configs,
  12. _get_share_qparams_op_configs,
  13. _get_tensor_info_op_configs,
  14. )
  15. from .backend_config import BackendConfig, DTypeConfig
  16. __all__ = [
  17. "get_fbgemm_backend_config",
  18. ]
  19. # ===================
  20. # | DTYPE CONFIGS |
  21. # ===================
  22. # TODO: For now, these DTypeConfigs are identical to the ones defined in native.py
  23. # In the future, once we support specifying quant_min/quant_max and scale_min/scale_max,
  24. # these will diverge. In particular, for FBGEMM, we will restrict the activation quantized
  25. # values to within [0, 127].
  26. fbgemm_weighted_op_quint8_dtype_config = DTypeConfig(
  27. input_dtype=torch.quint8,
  28. output_dtype=torch.quint8,
  29. weight_dtype=torch.qint8,
  30. bias_dtype=torch.float,
  31. )
  32. fbgemm_default_op_quint8_dtype_config = DTypeConfig(
  33. input_dtype=torch.quint8,
  34. output_dtype=torch.quint8,
  35. )
  36. fbgemm_default_op_fp16_dtype_config = DTypeConfig(
  37. input_dtype=torch.float16,
  38. output_dtype=torch.float16,
  39. weight_dtype=torch.float16,
  40. bias_dtype=torch.float16,
  41. )
  42. fbgemm_default_dynamic_int8_dtype_config = DTypeConfig(
  43. input_dtype=torch.quint8,
  44. output_dtype=torch.float,
  45. weight_dtype=torch.qint8,
  46. bias_dtype=torch.float,
  47. is_dynamic=True,
  48. )
  49. fbgemm_default_dynamic_float16_dtype_config = DTypeConfig(
  50. input_dtype=torch.float16,
  51. output_dtype=torch.float,
  52. weight_dtype=torch.float16,
  53. bias_dtype=torch.float,
  54. is_dynamic=True,
  55. )
  56. fbgemm_weight_only_quint8_dtype_config = DTypeConfig(
  57. input_dtype=torch.float,
  58. output_dtype=torch.float,
  59. weight_dtype=torch.quint8,
  60. )
  61. fbgemm_weight_only_quint4x2_dtype_config = DTypeConfig(
  62. input_dtype=torch.float,
  63. output_dtype=torch.float,
  64. weight_dtype=torch.quint4x2,
  65. )
  66. # =====================
  67. # | BACKEND CONFIGS |
  68. # =====================
  69. def get_fbgemm_backend_config() -> BackendConfig:
  70. """
  71. Return the `BackendConfig` for PyTorch's native FBGEMM backend.
  72. """
  73. conv_dtype_configs = [fbgemm_weighted_op_quint8_dtype_config]
  74. linear_dtype_configs = [
  75. fbgemm_weighted_op_quint8_dtype_config,
  76. fbgemm_default_dynamic_int8_dtype_config,
  77. fbgemm_default_dynamic_float16_dtype_config,
  78. ]
  79. binary_op_dtype_configs = [fbgemm_default_op_quint8_dtype_config]
  80. default_op_dtype_configs = [fbgemm_default_op_quint8_dtype_config]
  81. fixed_qparams_op_dtype_configs = [fbgemm_default_op_quint8_dtype_config]
  82. share_qparams_op_dtype_configs = [fbgemm_default_op_quint8_dtype_config]
  83. tensor_info_op_dtype_configs = [fbgemm_default_op_quint8_dtype_config]
  84. rnn_op_dtype_configs = [
  85. fbgemm_default_dynamic_int8_dtype_config,
  86. fbgemm_default_dynamic_float16_dtype_config,
  87. ]
  88. embedding_op_dtype_configs = [
  89. fbgemm_weight_only_quint8_dtype_config,
  90. fbgemm_weight_only_quint4x2_dtype_config,
  91. ]
  92. return BackendConfig("fbgemm") \
  93. .set_backend_pattern_configs(_get_conv_configs(conv_dtype_configs)) \
  94. .set_backend_pattern_configs(_get_linear_configs(linear_dtype_configs)) \
  95. .set_backend_pattern_configs(_get_binary_op_configs(binary_op_dtype_configs)) \
  96. .set_backend_pattern_config(_get_cat_config(default_op_dtype_configs)) \
  97. .set_backend_pattern_configs(_get_default_op_configs(default_op_dtype_configs)) \
  98. .set_backend_pattern_configs(_get_fixed_qparams_op_configs(fixed_qparams_op_dtype_configs)) \
  99. .set_backend_pattern_configs(_get_share_qparams_op_configs(share_qparams_op_dtype_configs)) \
  100. .set_backend_pattern_configs(_get_tensor_info_op_configs(tensor_info_op_dtype_configs)) \
  101. .set_backend_pattern_configs(_get_bn_configs(default_op_dtype_configs)) \
  102. .set_backend_pattern_configs(_get_rnn_op_configs(rnn_op_dtype_configs)) \
  103. .set_backend_pattern_configs(_get_embedding_op_configs(embedding_op_dtype_configs))