diff --git a/backends/arm/quantizer/arm_quantizer.py b/backends/arm/quantizer/arm_quantizer.py index e33555ced7c..a09fa68ab0e 100644 --- a/backends/arm/quantizer/arm_quantizer.py +++ b/backends/arm/quantizer/arm_quantizer.py @@ -71,6 +71,7 @@ SharedQspecQuantizer, ) from executorch.backends.arm.vgf import VgfCompileSpec +from executorch.exir._warnings import experimental from torch.fx import GraphModule, Node from torchao.quantization.pt2e import ( FakeQuantize, @@ -441,14 +442,26 @@ def _for_each_filtered_node( class TOSAQuantizer(Quantizer): - """Manage quantization annotations for TOSA-compatible backends.""" + """Manage quantization annotations for TOSA-compatible backends. + + .. warning:: + Setting ``use_composable_quantizer=True`` enables an experimental API + surface that may change without notice. + + """ def __init__( self, compile_spec_or_tosa_spec, use_composable_quantizer: bool = False, ) -> None: - """Create a TOSA quantizer from a TOSA spec or Arm compile spec.""" + """Create a TOSA quantizer from a TOSA spec or Arm compile spec. + + .. warning:: + Setting ``use_composable_quantizer=True`` enables an experimental + API surface that may change without notice. + + """ self.use_composable_quantizer = use_composable_quantizer self.quantizer: _TOSAQuantizerV1 | _TOSAQuantizerV2 if use_composable_quantizer: @@ -606,6 +619,10 @@ def set_io( self.quantizer.set_io(quantization_config) return self + @experimental( + "This API is experimental and may change without notice. " + "It is only available when use_composable_quantizer=True." + ) def add_quantizer(self, quantizer: Quantizer) -> TOSAQuantizer: """Insert a quantizer with highest precedence.""" if self.use_composable_quantizer: @@ -614,6 +631,10 @@ def add_quantizer(self, quantizer: Quantizer) -> TOSAQuantizer: "add_quantizer is only supported in the composable quantizer implementation." ) + @experimental( + "This API is experimental and may change without notice. " + "It is only available when use_composable_quantizer=True." + ) def set_node_finder( self, quantization_config: Optional[QuantizationConfig], node_finder: NodeFinder ) -> TOSAQuantizer: @@ -631,6 +652,10 @@ def set_node_finder( "set_node_finder is only supported in the composable quantizer implementation." ) + @experimental( + "This API is experimental and may change without notice. " + "It is only available when use_composable_quantizer=True." + ) def set_node_target( self, node_target: OpOverload, quantization_config: Optional[QuantizationConfig] ) -> TOSAQuantizer: @@ -641,6 +666,10 @@ def set_node_target( "set_node_target is only supported in the composable quantizer implementation." ) + @experimental( + "This API is experimental and may change without notice. " + "It is only available when use_composable_quantizer=True." + ) def set_node_name( self, node_name: str, quantization_config: Optional[QuantizationConfig] ) -> TOSAQuantizer: @@ -1167,6 +1196,10 @@ def set_io( class EthosUQuantizer(TOSAQuantizer): """Quantizer supported by the Arm Ethos-U backend. + .. warning:: + Setting ``use_composable_quantizer=True`` enables an experimental API + surface that may change without notice. + Args: compile_spec (EthosUCompileSpec): Backend compile specification for Ethos-U targets. @@ -1185,6 +1218,10 @@ def __init__( class VgfQuantizer(TOSAQuantizer): """Quantizer supported by the Arm Vgf backend. + .. warning:: + Setting ``use_composable_quantizer=True`` enables an experimental API + surface that may change without notice. + Args: compile_spec (VgfCompileSpec): Backend compile specification for Vgf targets. diff --git a/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md b/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md index 190ea3d9032..b107827b283 100644 --- a/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md +++ b/docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md @@ -16,13 +16,23 @@ The Arm Ethos-U delegate supports the following quantization schemes: ### Quantization API ```python -class EthosUQuantizer(compile_spec: 'EthosUCompileSpec') -> 'None' +class EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None' ``` Quantizer supported by the Arm Ethos-U backend. +.. warning:: + Setting ``use_composable_quantizer=True`` enables an experimental API + surface that may change without notice. + Args: - **compile_spec (EthosUCompileSpec)**: Backend compile specification for Ethos-U targets. +- **use_composable_quantizer (bool)**: Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details. + +```python +def EthosUQuantizer.add_quantizer(self, quantizer: 'Quantizer') -> 'TOSAQuantizer': +``` +Insert a quantizer with highest precedence. ```python def EthosUQuantizer.quantize_with_submodules(self, model: 'GraphModule', calibration_samples: 'list[tuple]', is_qat: 'bool' = False, fold_quantize: 'bool' = True): @@ -48,22 +58,24 @@ Returns: - **GraphModule**: The quantized model. ```python -def EthosUQuantizer.set_global(self, quantization_config: 'QuantizationConfig | None') -> 'TOSAQuantizer': +def EthosUQuantizer.set_global(self, quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer': ``` Set quantization_config for submodules not matched by other filters. Args: -- **quantization_config (QuantizationConfig)**: Configuration to apply to - modules that are not captured by name or type filters. +- **quantization_config (Optional[QuantizationConfig])**: Configuration to + apply to modules that are not captured by name or type filters. + ``None`` indicates no quantization. ```python -def EthosUQuantizer.set_io(self, quantization_config: 'QuantizationConfig') -> 'TOSAQuantizer': +def EthosUQuantizer.set_io(self, quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer': ``` Set quantization_config for input and output nodes. Args: -- **quantization_config (QuantizationConfig)**: Configuration describing - activation quantization for model inputs and outputs. +- **quantization_config (Optional[QuantizationConfig])**: Configuration + describing activation quantization for model inputs and outputs. + ``None`` indicates no quantization. ```python def EthosUQuantizer.set_module_name(self, module_name: 'str', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer': @@ -75,29 +87,51 @@ patterns for that submodule with the provided quantization_config. Args: - **module_name (str)**: Fully qualified module name to configure. -- **quantization_config (QuantizationConfig)**: Configuration applied to - the named submodule. +- **quantization_config (Optional[QuantizationConfig])**: Configuration + applied to the named submodule. ``None`` indicates no + quantization. ```python def EthosUQuantizer.set_module_type(self, module_type: 'Callable', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer': ``` Set quantization_config for submodules with a given module type. -For example, calling set_module_type(Sub) quantizes supported patterns -in each Sub instance with the provided quantization_config. +For example, calling set_module_type(Softmax) quantizes supported +patterns in each Softmax instance with the provided quantization_config. Args: - **module_type (Callable)**: Type whose submodules should use the provided quantization configuration. -- **quantization_config (QuantizationConfig)**: Configuration to apply to - submodules of the given type. +- **quantization_config (Optional[QuantizationConfig])**: Configuration to + apply to submodules of the given type. ``None`` indicates no + quantization. + +```python +def EthosUQuantizer.set_node_finder(self, quantization_config: 'Optional[QuantizationConfig]', node_finder: 'NodeFinder') -> 'TOSAQuantizer': +``` +Set quantization_config for nodes matched by a custom NodeFinder. + +Args: +- **quantization_config (Optional[QuantizationConfig])**: Configuration + describing quantization settings for nodes matched by the provided + NodeFinder. ``None`` indicates no quantization. + +```python +def EthosUQuantizer.set_node_name(self, node_name: 'str', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer': +``` +Set quantization config for a specific node name. + +```python +def EthosUQuantizer.set_node_target(self, node_target: 'OpOverload', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer': +``` +Set quantization config for a specific operator target. ```python def EthosUQuantizer.transform_for_annotation(self, model: 'GraphModule') -> 'GraphModule': ``` Transform the graph to prepare it for quantization annotation. -Currently transforms scalar values to tensor attributes. +Decomposes all operators where required to get correct quantization parameters. Args: - **model (GraphModule)**: Model whose graph will be transformed. diff --git a/docs/source/backends/arm-vgf/arm-vgf-quantization.md b/docs/source/backends/arm-vgf/arm-vgf-quantization.md index cddcc0ae9b3..0beb755e3f4 100644 --- a/docs/source/backends/arm-vgf/arm-vgf-quantization.md +++ b/docs/source/backends/arm-vgf/arm-vgf-quantization.md @@ -35,13 +35,23 @@ setting using the `set_module_name` or `set_module_type` methods. ### Quantization API ```python -class VgfQuantizer(compile_spec: 'VgfCompileSpec') -> 'None' +class VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None' ``` Quantizer supported by the Arm Vgf backend. +.. warning:: + Setting ``use_composable_quantizer=True`` enables an experimental API + surface that may change without notice. + Args: - **compile_spec (VgfCompileSpec)**: Backend compile specification for Vgf targets. +- **use_composable_quantizer (bool)**: Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details. + +```python +def VgfQuantizer.add_quantizer(self, quantizer: 'Quantizer') -> 'TOSAQuantizer': +``` +Insert a quantizer with highest precedence. ```python def VgfQuantizer.quantize_with_submodules(self, model: 'GraphModule', calibration_samples: 'list[tuple]', is_qat: 'bool' = False, fold_quantize: 'bool' = True): @@ -67,22 +77,24 @@ Returns: - **GraphModule**: The quantized model. ```python -def VgfQuantizer.set_global(self, quantization_config: 'QuantizationConfig | None') -> 'TOSAQuantizer': +def VgfQuantizer.set_global(self, quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer': ``` Set quantization_config for submodules not matched by other filters. Args: -- **quantization_config (QuantizationConfig)**: Configuration to apply to - modules that are not captured by name or type filters. +- **quantization_config (Optional[QuantizationConfig])**: Configuration to + apply to modules that are not captured by name or type filters. + ``None`` indicates no quantization. ```python -def VgfQuantizer.set_io(self, quantization_config: 'QuantizationConfig') -> 'TOSAQuantizer': +def VgfQuantizer.set_io(self, quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer': ``` Set quantization_config for input and output nodes. Args: -- **quantization_config (QuantizationConfig)**: Configuration describing - activation quantization for model inputs and outputs. +- **quantization_config (Optional[QuantizationConfig])**: Configuration + describing activation quantization for model inputs and outputs. + ``None`` indicates no quantization. ```python def VgfQuantizer.set_module_name(self, module_name: 'str', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer': @@ -94,29 +106,51 @@ patterns for that submodule with the provided quantization_config. Args: - **module_name (str)**: Fully qualified module name to configure. -- **quantization_config (QuantizationConfig)**: Configuration applied to - the named submodule. +- **quantization_config (Optional[QuantizationConfig])**: Configuration + applied to the named submodule. ``None`` indicates no + quantization. ```python def VgfQuantizer.set_module_type(self, module_type: 'Callable', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer': ``` Set quantization_config for submodules with a given module type. -For example, calling set_module_type(Sub) quantizes supported patterns -in each Sub instance with the provided quantization_config. +For example, calling set_module_type(Softmax) quantizes supported +patterns in each Softmax instance with the provided quantization_config. Args: - **module_type (Callable)**: Type whose submodules should use the provided quantization configuration. -- **quantization_config (QuantizationConfig)**: Configuration to apply to - submodules of the given type. +- **quantization_config (Optional[QuantizationConfig])**: Configuration to + apply to submodules of the given type. ``None`` indicates no + quantization. + +```python +def VgfQuantizer.set_node_finder(self, quantization_config: 'Optional[QuantizationConfig]', node_finder: 'NodeFinder') -> 'TOSAQuantizer': +``` +Set quantization_config for nodes matched by a custom NodeFinder. + +Args: +- **quantization_config (Optional[QuantizationConfig])**: Configuration + describing quantization settings for nodes matched by the provided + NodeFinder. ``None`` indicates no quantization. + +```python +def VgfQuantizer.set_node_name(self, node_name: 'str', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer': +``` +Set quantization config for a specific node name. + +```python +def VgfQuantizer.set_node_target(self, node_target: 'OpOverload', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer': +``` +Set quantization config for a specific operator target. ```python def VgfQuantizer.transform_for_annotation(self, model: 'GraphModule') -> 'GraphModule': ``` Transform the graph to prepare it for quantization annotation. -Currently transforms scalar values to tensor attributes. +Decomposes all operators where required to get correct quantization parameters. Args: - **model (GraphModule)**: Model whose graph will be transformed.