Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 39 additions & 2 deletions backends/arm/quantizer/arm_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
SharedQspecQuantizer,
)
from executorch.backends.arm.vgf import VgfCompileSpec
from executorch.exir._warnings import experimental
from torch.fx import GraphModule, Node
from torchao.quantization.pt2e import (
FakeQuantize,
Expand Down Expand Up @@ -441,14 +442,26 @@ def _for_each_filtered_node(


class TOSAQuantizer(Quantizer):
"""Manage quantization annotations for TOSA-compatible backends."""
"""Manage quantization annotations for TOSA-compatible backends.

.. warning::
Setting ``use_composable_quantizer=True`` enables an experimental API
surface that may change without notice.

"""

def __init__(
self,
compile_spec_or_tosa_spec,
use_composable_quantizer: bool = False,
) -> None:
"""Create a TOSA quantizer from a TOSA spec or Arm compile spec."""
"""Create a TOSA quantizer from a TOSA spec or Arm compile spec.

.. warning::
Setting ``use_composable_quantizer=True`` enables an experimental
API surface that may change without notice.

"""
self.use_composable_quantizer = use_composable_quantizer
self.quantizer: _TOSAQuantizerV1 | _TOSAQuantizerV2
if use_composable_quantizer:
Expand Down Expand Up @@ -606,6 +619,10 @@ def set_io(
self.quantizer.set_io(quantization_config)
return self

@experimental(
"This API is experimental and may change without notice. "
"It is only available when use_composable_quantizer=True."
)
def add_quantizer(self, quantizer: Quantizer) -> TOSAQuantizer:
"""Insert a quantizer with highest precedence."""
if self.use_composable_quantizer:
Expand All @@ -614,6 +631,10 @@ def add_quantizer(self, quantizer: Quantizer) -> TOSAQuantizer:
"add_quantizer is only supported in the composable quantizer implementation."
)

@experimental(
"This API is experimental and may change without notice. "
"It is only available when use_composable_quantizer=True."
)
def set_node_finder(
self, quantization_config: Optional[QuantizationConfig], node_finder: NodeFinder
) -> TOSAQuantizer:
Expand All @@ -631,6 +652,10 @@ def set_node_finder(
"set_node_finder is only supported in the composable quantizer implementation."
)

@experimental(
"This API is experimental and may change without notice. "
"It is only available when use_composable_quantizer=True."
)
def set_node_target(
self, node_target: OpOverload, quantization_config: Optional[QuantizationConfig]
) -> TOSAQuantizer:
Expand All @@ -641,6 +666,10 @@ def set_node_target(
"set_node_target is only supported in the composable quantizer implementation."
)

@experimental(
"This API is experimental and may change without notice. "
"It is only available when use_composable_quantizer=True."
)
def set_node_name(
self, node_name: str, quantization_config: Optional[QuantizationConfig]
) -> TOSAQuantizer:
Expand Down Expand Up @@ -1167,6 +1196,10 @@ def set_io(
class EthosUQuantizer(TOSAQuantizer):
"""Quantizer supported by the Arm Ethos-U backend.

.. warning::
Setting ``use_composable_quantizer=True`` enables an experimental API
surface that may change without notice.

Args:
compile_spec (EthosUCompileSpec): Backend compile specification for
Ethos-U targets.
Expand All @@ -1185,6 +1218,10 @@ def __init__(
class VgfQuantizer(TOSAQuantizer):
"""Quantizer supported by the Arm Vgf backend.

.. warning::
Setting ``use_composable_quantizer=True`` enables an experimental API
surface that may change without notice.

Args:
compile_spec (VgfCompileSpec): Backend compile specification for Vgf
targets.
Expand Down
62 changes: 48 additions & 14 deletions docs/source/backends/arm-ethos-u/arm-ethos-u-quantization.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,23 @@ The Arm Ethos-U delegate supports the following quantization schemes:
### Quantization API

```python
class EthosUQuantizer(compile_spec: 'EthosUCompileSpec') -> 'None'
class EthosUQuantizer(compile_spec: 'EthosUCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'
```
Quantizer supported by the Arm Ethos-U backend.

.. warning::
Setting ``use_composable_quantizer=True`` enables an experimental API
surface that may change without notice.

Args:
- **compile_spec (EthosUCompileSpec)**: Backend compile specification for
Ethos-U targets.
- **use_composable_quantizer (bool)**: Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details.

```python
def EthosUQuantizer.add_quantizer(self, quantizer: 'Quantizer') -> 'TOSAQuantizer':
```
Insert a quantizer with highest precedence.

```python
def EthosUQuantizer.quantize_with_submodules(self, model: 'GraphModule', calibration_samples: 'list[tuple]', is_qat: 'bool' = False, fold_quantize: 'bool' = True):
Expand All @@ -48,22 +58,24 @@ Returns:
- **GraphModule**: The quantized model.

```python
def EthosUQuantizer.set_global(self, quantization_config: 'QuantizationConfig | None') -> 'TOSAQuantizer':
def EthosUQuantizer.set_global(self, quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
```
Set quantization_config for submodules not matched by other filters.

Args:
- **quantization_config (QuantizationConfig)**: Configuration to apply to
modules that are not captured by name or type filters.
- **quantization_config (Optional[QuantizationConfig])**: Configuration to
apply to modules that are not captured by name or type filters.
``None`` indicates no quantization.

```python
def EthosUQuantizer.set_io(self, quantization_config: 'QuantizationConfig') -> 'TOSAQuantizer':
def EthosUQuantizer.set_io(self, quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
```
Set quantization_config for input and output nodes.

Args:
- **quantization_config (QuantizationConfig)**: Configuration describing
activation quantization for model inputs and outputs.
- **quantization_config (Optional[QuantizationConfig])**: Configuration
describing activation quantization for model inputs and outputs.
``None`` indicates no quantization.

```python
def EthosUQuantizer.set_module_name(self, module_name: 'str', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
Expand All @@ -75,29 +87,51 @@ patterns for that submodule with the provided quantization_config.

Args:
- **module_name (str)**: Fully qualified module name to configure.
- **quantization_config (QuantizationConfig)**: Configuration applied to
the named submodule.
- **quantization_config (Optional[QuantizationConfig])**: Configuration
applied to the named submodule. ``None`` indicates no
quantization.

```python
def EthosUQuantizer.set_module_type(self, module_type: 'Callable', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
```
Set quantization_config for submodules with a given module type.

For example, calling set_module_type(Sub) quantizes supported patterns
in each Sub instance with the provided quantization_config.
For example, calling set_module_type(Softmax) quantizes supported
patterns in each Softmax instance with the provided quantization_config.

Args:
- **module_type (Callable)**: Type whose submodules should use the
provided quantization configuration.
- **quantization_config (QuantizationConfig)**: Configuration to apply to
submodules of the given type.
- **quantization_config (Optional[QuantizationConfig])**: Configuration to
apply to submodules of the given type. ``None`` indicates no
quantization.

```python
def EthosUQuantizer.set_node_finder(self, quantization_config: 'Optional[QuantizationConfig]', node_finder: 'NodeFinder') -> 'TOSAQuantizer':
```
Set quantization_config for nodes matched by a custom NodeFinder.

Args:
- **quantization_config (Optional[QuantizationConfig])**: Configuration
describing quantization settings for nodes matched by the provided
NodeFinder. ``None`` indicates no quantization.

```python
def EthosUQuantizer.set_node_name(self, node_name: 'str', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
```
Set quantization config for a specific node name.

```python
def EthosUQuantizer.set_node_target(self, node_target: 'OpOverload', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
```
Set quantization config for a specific operator target.

```python
def EthosUQuantizer.transform_for_annotation(self, model: 'GraphModule') -> 'GraphModule':
```
Transform the graph to prepare it for quantization annotation.

Currently transforms scalar values to tensor attributes.
Decomposes all operators where required to get correct quantization parameters.

Args:
- **model (GraphModule)**: Model whose graph will be transformed.
Expand Down
62 changes: 48 additions & 14 deletions docs/source/backends/arm-vgf/arm-vgf-quantization.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,23 @@ setting using the `set_module_name` or `set_module_type` methods.
### Quantization API

```python
class VgfQuantizer(compile_spec: 'VgfCompileSpec') -> 'None'
class VgfQuantizer(compile_spec: 'VgfCompileSpec', use_composable_quantizer: 'bool' = False) -> 'None'
```
Quantizer supported by the Arm Vgf backend.

.. warning::
Setting ``use_composable_quantizer=True`` enables an experimental API
surface that may change without notice.

Args:
- **compile_spec (VgfCompileSpec)**: Backend compile specification for Vgf
targets.
- **use_composable_quantizer (bool)**: Whether to use the composable quantizer implementation. See https://github.com/pytorch/executorch/issues/17701" for details.

```python
def VgfQuantizer.add_quantizer(self, quantizer: 'Quantizer') -> 'TOSAQuantizer':
```
Insert a quantizer with highest precedence.

```python
def VgfQuantizer.quantize_with_submodules(self, model: 'GraphModule', calibration_samples: 'list[tuple]', is_qat: 'bool' = False, fold_quantize: 'bool' = True):
Expand All @@ -67,22 +77,24 @@ Returns:
- **GraphModule**: The quantized model.

```python
def VgfQuantizer.set_global(self, quantization_config: 'QuantizationConfig | None') -> 'TOSAQuantizer':
def VgfQuantizer.set_global(self, quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
```
Set quantization_config for submodules not matched by other filters.

Args:
- **quantization_config (QuantizationConfig)**: Configuration to apply to
modules that are not captured by name or type filters.
- **quantization_config (Optional[QuantizationConfig])**: Configuration to
apply to modules that are not captured by name or type filters.
``None`` indicates no quantization.

```python
def VgfQuantizer.set_io(self, quantization_config: 'QuantizationConfig') -> 'TOSAQuantizer':
def VgfQuantizer.set_io(self, quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
```
Set quantization_config for input and output nodes.

Args:
- **quantization_config (QuantizationConfig)**: Configuration describing
activation quantization for model inputs and outputs.
- **quantization_config (Optional[QuantizationConfig])**: Configuration
describing activation quantization for model inputs and outputs.
``None`` indicates no quantization.

```python
def VgfQuantizer.set_module_name(self, module_name: 'str', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
Expand All @@ -94,29 +106,51 @@ patterns for that submodule with the provided quantization_config.

Args:
- **module_name (str)**: Fully qualified module name to configure.
- **quantization_config (QuantizationConfig)**: Configuration applied to
the named submodule.
- **quantization_config (Optional[QuantizationConfig])**: Configuration
applied to the named submodule. ``None`` indicates no
quantization.

```python
def VgfQuantizer.set_module_type(self, module_type: 'Callable', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
```
Set quantization_config for submodules with a given module type.

For example, calling set_module_type(Sub) quantizes supported patterns
in each Sub instance with the provided quantization_config.
For example, calling set_module_type(Softmax) quantizes supported
patterns in each Softmax instance with the provided quantization_config.

Args:
- **module_type (Callable)**: Type whose submodules should use the
provided quantization configuration.
- **quantization_config (QuantizationConfig)**: Configuration to apply to
submodules of the given type.
- **quantization_config (Optional[QuantizationConfig])**: Configuration to
apply to submodules of the given type. ``None`` indicates no
quantization.

```python
def VgfQuantizer.set_node_finder(self, quantization_config: 'Optional[QuantizationConfig]', node_finder: 'NodeFinder') -> 'TOSAQuantizer':
```
Set quantization_config for nodes matched by a custom NodeFinder.

Args:
- **quantization_config (Optional[QuantizationConfig])**: Configuration
describing quantization settings for nodes matched by the provided
NodeFinder. ``None`` indicates no quantization.

```python
def VgfQuantizer.set_node_name(self, node_name: 'str', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
```
Set quantization config for a specific node name.

```python
def VgfQuantizer.set_node_target(self, node_target: 'OpOverload', quantization_config: 'Optional[QuantizationConfig]') -> 'TOSAQuantizer':
```
Set quantization config for a specific operator target.

```python
def VgfQuantizer.transform_for_annotation(self, model: 'GraphModule') -> 'GraphModule':
```
Transform the graph to prepare it for quantization annotation.

Currently transforms scalar values to tensor attributes.
Decomposes all operators where required to get correct quantization parameters.

Args:
- **model (GraphModule)**: Model whose graph will be transformed.
Expand Down
Loading