123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204 |
- import torch
- from ._common_operator_config_utils import (
- _get_binary_op_configs,
- _get_bn_configs,
- _get_cat_config,
- _get_conv_configs,
- _get_default_op_configs,
- _get_embedding_op_configs,
- _get_fixed_qparams_op_configs,
- _get_linear_configs,
- _get_ln_configs,
- _get_rnn_op_configs,
- _get_share_qparams_op_configs,
- _get_tensor_info_op_configs,
- )
- from .backend_config import BackendConfig, DTypeConfig
- __all__ = [
- "get_test_only_legacy_native_backend_config",
- "default_op_quint8_dtype_config",
- "default_op_fp16_dtype_config",
- "default_dynamic_int8_dtype_config",
- "default_dynamic_float16_dtype_config",
- "input_output_only_quint8_dtype_config",
- "weight_only_quint8_dtype_config",
- "weight_only_quint4x2_dtype_config",
- "get_native_backend_config",
- "get_native_backend_config_dict",
- "get_test_only_legacy_native_backend_config_dict",
- ]
- # ===================
- # | DTYPE CONFIGS |
- # ===================
- # weighted op int8 dtype config
- # this is config for ops that has quantized weights, like linear, conv
- weighted_op_quint8_dtype_config = DTypeConfig(
- input_dtype=torch.quint8,
- output_dtype=torch.quint8,
- weight_dtype=torch.qint8,
- bias_dtype=torch.float,
- )
- default_op_quint8_dtype_config = DTypeConfig(
- input_dtype=torch.quint8,
- output_dtype=torch.quint8,
- )
- default_op_fp16_dtype_config = DTypeConfig(
- input_dtype=torch.float16,
- output_dtype=torch.float16,
- weight_dtype=torch.float16,
- bias_dtype=torch.float16,
- )
- default_dynamic_int8_dtype_config = DTypeConfig(
- input_dtype=torch.quint8,
- output_dtype=torch.float,
- weight_dtype=torch.qint8,
- bias_dtype=torch.float,
- # currently the dtype check is not yet enabled, so we provided the dtype_configs but
- # it is not really used yet,
- # we will enable it a bit later after we moved everything to backend_config_dict
- is_dynamic=True,
- )
- default_dynamic_float16_dtype_config = DTypeConfig(
- input_dtype=torch.float16,
- output_dtype=torch.float,
- weight_dtype=torch.float16,
- bias_dtype=torch.float,
- # currently the dtype check is not yet enabled, so we provided the dtype_configs but
- # it is not really used yet,
- # we will enable it a bit later after we moved everything to backend_config_dict
- is_dynamic=True,
- )
- # Needed for LayerNorm and f.layer_norm, since currently the kernel only supports float weights
- input_output_only_quint8_dtype_config = DTypeConfig(
- input_dtype=torch.quint8,
- output_dtype=torch.quint8,
- weight_dtype=torch.float,
- bias_dtype=torch.float,
- )
- weight_only_quint8_dtype_config = DTypeConfig(
- input_dtype=torch.float,
- output_dtype=torch.float,
- weight_dtype=torch.quint8,
- )
- weight_only_quint4x2_dtype_config = DTypeConfig(
- input_dtype=torch.float,
- output_dtype=torch.float,
- weight_dtype=torch.quint4x2,
- )
- # =====================
- # | BACKEND CONFIGS |
- # =====================
- def get_test_only_legacy_native_backend_config() -> BackendConfig:
- """
- Return the `BackendConfig` for PyTorch Native backend (fbgemm/qnnpack) with various additional fp16 ops.
- """
- conv_dtype_configs = [weighted_op_quint8_dtype_config]
- linear_dtype_configs = [
- weighted_op_quint8_dtype_config,
- default_dynamic_int8_dtype_config,
- default_dynamic_float16_dtype_config,
- default_op_fp16_dtype_config,
- ]
- binary_op_dtype_configs = [
- default_op_quint8_dtype_config,
- default_op_fp16_dtype_config,
- ]
- default_op_dtype_configs = [default_op_quint8_dtype_config]
- fixed_qparams_op_dtype_configs = [
- default_op_quint8_dtype_config,
- default_op_fp16_dtype_config,
- ]
- share_qparams_op_dtype_configs = [
- default_op_quint8_dtype_config,
- default_op_fp16_dtype_config
- ]
- tensor_info_op_dtype_configs = [
- default_op_quint8_dtype_config,
- ]
- rnn_op_dtype_configs = [
- default_dynamic_int8_dtype_config,
- default_dynamic_float16_dtype_config,
- ]
- embedding_op_dtype_configs = [
- weight_only_quint8_dtype_config,
- weight_only_quint4x2_dtype_config,
- ]
- layer_norm_op_dtype_configs = [input_output_only_quint8_dtype_config]
- return BackendConfig("_native_and_fp16") \
- .set_backend_pattern_configs(_get_conv_configs(conv_dtype_configs)) \
- .set_backend_pattern_configs(_get_linear_configs(linear_dtype_configs)) \
- .set_backend_pattern_configs(_get_binary_op_configs(binary_op_dtype_configs)) \
- .set_backend_pattern_config(_get_cat_config(default_op_dtype_configs)) \
- .set_backend_pattern_configs(_get_default_op_configs(default_op_dtype_configs)) \
- .set_backend_pattern_configs(_get_fixed_qparams_op_configs(fixed_qparams_op_dtype_configs)) \
- .set_backend_pattern_configs(_get_share_qparams_op_configs(share_qparams_op_dtype_configs)) \
- .set_backend_pattern_configs(_get_tensor_info_op_configs(tensor_info_op_dtype_configs)) \
- .set_backend_pattern_configs(_get_bn_configs(default_op_dtype_configs)) \
- .set_backend_pattern_configs(_get_ln_configs(layer_norm_op_dtype_configs)) \
- .set_backend_pattern_configs(_get_rnn_op_configs(rnn_op_dtype_configs)) \
- .set_backend_pattern_configs(_get_embedding_op_configs(embedding_op_dtype_configs))
- def get_native_backend_config() -> BackendConfig:
- """
- Return the `BackendConfig` for PyTorch Native backend (fbgemm/qnnpack).
- """
- # TODO: express this BackendConfig as a union of the FBGEMM and QNNPACK BackendConfigs
- conv_dtype_configs = [weighted_op_quint8_dtype_config]
- linear_dtype_configs = [
- weighted_op_quint8_dtype_config,
- default_dynamic_int8_dtype_config,
- default_dynamic_float16_dtype_config,
- ]
- binary_op_dtype_configs = [default_op_quint8_dtype_config]
- default_op_dtype_configs = [default_op_quint8_dtype_config]
- fixed_qparams_op_dtype_configs = [default_op_quint8_dtype_config]
- share_qparams_op_dtype_configs = [default_op_quint8_dtype_config]
- tensor_info_op_dtype_configs = [default_op_quint8_dtype_config]
- rnn_op_dtype_configs = [
- default_dynamic_int8_dtype_config,
- default_dynamic_float16_dtype_config,
- ]
- embedding_op_dtype_configs = [
- weight_only_quint8_dtype_config,
- weight_only_quint4x2_dtype_config,
- ]
- layer_norm_op_dtype_configs = [input_output_only_quint8_dtype_config]
- return BackendConfig("native") \
- .set_backend_pattern_configs(_get_conv_configs(conv_dtype_configs)) \
- .set_backend_pattern_configs(_get_linear_configs(linear_dtype_configs)) \
- .set_backend_pattern_configs(_get_binary_op_configs(binary_op_dtype_configs)) \
- .set_backend_pattern_config(_get_cat_config(default_op_dtype_configs)) \
- .set_backend_pattern_configs(_get_default_op_configs(default_op_dtype_configs)) \
- .set_backend_pattern_configs(_get_fixed_qparams_op_configs(fixed_qparams_op_dtype_configs)) \
- .set_backend_pattern_configs(_get_share_qparams_op_configs(share_qparams_op_dtype_configs)) \
- .set_backend_pattern_configs(_get_tensor_info_op_configs(tensor_info_op_dtype_configs)) \
- .set_backend_pattern_configs(_get_bn_configs(default_op_dtype_configs)) \
- .set_backend_pattern_configs(_get_ln_configs(layer_norm_op_dtype_configs)) \
- .set_backend_pattern_configs(_get_rnn_op_configs(rnn_op_dtype_configs)) \
- .set_backend_pattern_configs(_get_embedding_op_configs(embedding_op_dtype_configs))
- def get_native_backend_config_dict():
- """
- Return the `BackendConfig` for PyTorch Native backend (fbgemm/qnnpack) in dictionary form.
- """
- return get_native_backend_config().to_dict()
- def get_test_only_legacy_native_backend_config_dict():
- """
- Return the `BackendConfig` for PyTorch Native backend (fbgemm/qnnpack) with various additional
- fp16 ops in dictionary form.
- """
- return get_test_only_legacy_native_backend_config().to_dict()
|