From afd8367465978c7c42099ab8c59dc2b5e713632d Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Wed, 6 Mar 2024 18:37:44 +0530 Subject: [PATCH 01/22] add pipeline --- optimum/amd/ryzenai/__init__.py | 4 ++-- optimum/amd/ryzenai/modeling.py | 4 ++++ optimum/amd/ryzenai/models/__init__.py | 2 ++ optimum/amd/ryzenai/pipelines/__init__.py | 24 +++++++++++++++++++++-- tests/ryzenai/test_modeling.py | 6 +++--- 5 files changed, 33 insertions(+), 7 deletions(-) diff --git a/optimum/amd/ryzenai/__init__.py b/optimum/amd/ryzenai/__init__.py index 79e98198..a2b8c3a7 100644 --- a/optimum/amd/ryzenai/__init__.py +++ b/optimum/amd/ryzenai/__init__.py @@ -12,7 +12,7 @@ "RyzenAIModel", "RyzenAIModelForCustomTasks", "RyzenAIModelForImageClassification", - "RyzenAIModelForImageSegmentation", + "RyzenAIModelForSemanticSegmentation", "RyzenAIModelForImageToImage", "RyzenAIModelForObjectDetection", ], @@ -29,9 +29,9 @@ RyzenAIModel, RyzenAIModelForCustomTasks, RyzenAIModelForImageClassification, - RyzenAIModelForImageSegmentation, RyzenAIModelForImageToImage, RyzenAIModelForObjectDetection, + RyzenAIModelForSemanticSegmentation, ) from .pipelines import pipeline from .quantization import RyzenAIOnnxQuantizer diff --git a/optimum/amd/ryzenai/modeling.py b/optimum/amd/ryzenai/modeling.py index 9f2e8225..1a7e263e 100644 --- a/optimum/amd/ryzenai/modeling.py +++ b/optimum/amd/ryzenai/modeling.py @@ -643,5 +643,9 @@ class RyzenAIModelForImageSegmentation(RyzenAIModelForObjectDetection): pass +class RyzenAIModelForSemanticSegmentation(RyzenAIModelForObjectDetection): + pass + + class RyzenAIModelForImageToImage(RyzenAIModelForObjectDetection): pass diff --git a/optimum/amd/ryzenai/models/__init__.py b/optimum/amd/ryzenai/models/__init__.py index 21c4210b..bc719b07 100644 --- a/optimum/amd/ryzenai/models/__init__.py +++ b/optimum/amd/ryzenai/models/__init__.py @@ -11,11 +11,13 @@ "yolov5": ["YoloV5ImageProcessor"], "yolov8": ["YoloV8ImageProcessor"], "yolox": ["YoloXImageProcessor"], + "semanticfpn": ["SemanticFPNImageProcessor"], } # Direct imports for type-checking if TYPE_CHECKING: + from .semanticfpn import SemanticFPNImageProcessor from .yolov3 import YoloV3ImageProcessor from .yolov5 import YoloV5ImageProcessor from .yolov8 import YoloV3ImageProcessor diff --git a/optimum/amd/ryzenai/pipelines/__init__.py b/optimum/amd/ryzenai/pipelines/__init__.py index 608c7334..f4231e21 100644 --- a/optimum/amd/ryzenai/pipelines/__init__.py +++ b/optimum/amd/ryzenai/pipelines/__init__.py @@ -10,9 +10,21 @@ from transformers.image_processing_utils import BaseImageProcessor from transformers.onnx.utils import get_preprocessor -from ..modeling import RyzenAIModel, RyzenAIModelForImageClassification, RyzenAIModelForObjectDetection -from ..models import YoloV3ImageProcessor, YoloV5ImageProcessor, YoloV8ImageProcessor, YoloXImageProcessor +from ..modeling import ( + RyzenAIModel, + RyzenAIModelForImageClassification, + RyzenAIModelForObjectDetection, + RyzenAIModelForSemanticSegmentation, +) +from ..models import ( + SemanticFPNImageProcessor, + YoloV3ImageProcessor, + YoloV5ImageProcessor, + YoloV8ImageProcessor, + YoloXImageProcessor, +) from .image_classification import TimmImageClassificationPipeline +from .image_segmentation import ImageSegmentationPipeline from .object_detection import YoloObjectDetectionPipeline @@ -24,6 +36,7 @@ "yolov5": {"preprocessor": YoloV5ImageProcessor, "impl": YoloObjectDetectionPipeline}, "yolov3": {"preprocessor": YoloV3ImageProcessor, "impl": YoloObjectDetectionPipeline}, "yolov8": {"preprocessor": YoloV8ImageProcessor, "impl": YoloObjectDetectionPipeline}, + "semantic_fpn": {"preprocessor": SemanticFPNImageProcessor, "impl": ImageSegmentationPipeline}, } RYZENAI_SUPPORTED_TASKS = { @@ -40,6 +53,13 @@ "type": "image", "model_type": "yolox", }, + "image-segmentation": { + "impl": ImageSegmentationPipeline, + "class": (RyzenAIModelForSemanticSegmentation,), + "default": "amd/SemanticFPN", + "type": "image", + "model_type": "semantic_fpn", + }, } diff --git a/tests/ryzenai/test_modeling.py b/tests/ryzenai/test_modeling.py index 912105da..4eb71440 100644 --- a/tests/ryzenai/test_modeling.py +++ b/tests/ryzenai/test_modeling.py @@ -29,9 +29,9 @@ RyzenAIModel, RyzenAIModelForCustomTasks, RyzenAIModelForImageClassification, - RyzenAIModelForImageSegmentation, RyzenAIModelForImageToImage, RyzenAIModelForObjectDetection, + RyzenAIModelForSemanticSegmentation, pipeline, ) from optimum.utils import ( @@ -206,7 +206,7 @@ def test_pipeline_model_is_none(self): self.assertGreaterEqual(pred["score"], 0.0) -class RyzenAIModelForImageSegmentationIntegrationTest(unittest.TestCase, RyzenAITestCaseMixin): +class RyzenAIModelForSemanticSegmentationIntegrationTest(unittest.TestCase, RyzenAITestCaseMixin): @parameterized.expand(RYZEN_PREQUANTIZED_MODEL_IMAGE_SEGMENTATION) @pytest.mark.prequantized_model_test def test_model(self, model_id): @@ -217,7 +217,7 @@ def test_model(self, model_id): vaip_config = DEFAULT_VAIP_CONFIG outputs_ipu, outputs_cpu = self.prepare_outputs( - model_id, RyzenAIModelForImageSegmentation, ort_input, vaip_config, cache_dir, cache_key, file_name + model_id, RyzenAIModelForSemanticSegmentation, ort_input, vaip_config, cache_dir, cache_key, file_name ) for output_ipu, output_cpu in zip(outputs_ipu.values(), outputs_cpu.values()): From 3afe2bcdf9b82da5fba08dec7530c33ac6524a10 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Fri, 8 Mar 2024 19:42:05 +0530 Subject: [PATCH 02/22] add options --- optimum/amd/ryzenai/__init__.py | 2 +- optimum/amd/ryzenai/configuration.py | 427 ++++++++++++++++++++++++--- optimum/amd/ryzenai/quantization.py | 12 +- 3 files changed, 385 insertions(+), 56 deletions(-) diff --git a/optimum/amd/ryzenai/__init__.py b/optimum/amd/ryzenai/__init__.py index a2b8c3a7..1759411c 100644 --- a/optimum/amd/ryzenai/__init__.py +++ b/optimum/amd/ryzenai/__init__.py @@ -7,7 +7,7 @@ _import_structure = { - "configuration": ["RyzenAIConfig", "QuantizationConfig", "AutoQuantizationConfig"], + "configuration": ["RyzenAIConfig", "ExtraOptions", "QuantizationConfig", "AutoQuantizationConfig"], "modeling": [ "RyzenAIModel", "RyzenAIModelForCustomTasks", diff --git a/optimum/amd/ryzenai/configuration.py b/optimum/amd/ryzenai/configuration.py index c043d473..98040877 100644 --- a/optimum/amd/ryzenai/configuration.py +++ b/optimum/amd/ryzenai/configuration.py @@ -5,13 +5,238 @@ from dataclasses import asdict, dataclass from enum import Enum from typing import Optional - +import re import vai_q_onnx -from onnxruntime.quantization import CalibrationMethod, QuantFormat, QuantType +from onnxruntime.quantization import CalibrationMethod from optimum.configuration_utils import BaseConfig +from dataclasses import dataclass, field, fields +from typing import List, Tuple, Dict, Union + + +class CalibrationMethod(Enum): + MinMax = vai_q_onnx.CalibrationMethod.MinMax + Entropy = vai_q_onnx.CalibrationMethod.Entropy + Percentile = vai_q_onnx.CalibrationMethod.Percentile + NonOverflow = vai_q_onnx.PowerOfTwoMethod.NonOverflow + MinMSE = vai_q_onnx.PowerOfTwoMethod.MinMSE + + +class QuantFormat(Enum): + QOperator = vai_q_onnx.QuantFormat.QOperator + QDQ = vai_q_onnx.QuantFormat.QDQ + VitisQuantFormat_QDQ = vai_q_onnx.VitisQuantFormat.QDQ + VitisQuantFormat_FixNeuron = vai_q_onnx.VitisQuantFormat.FixNeuron + + +class QuantType(Enum): + QInt8 = vai_q_onnx.QuantType.QInt8 + QUInt8 = vai_q_onnx.QuantType.QUInt8 +@dataclass +class ExtraOptions: + """ + ExtraOptions is a dataclass handling additional options for quantization. + + Args: + activation_symmetric (`bool`, defaults to `False`): + If True, symmetrize calibration data for activations. + weight_symmetric (`bool`, defaults to `True`): + If True, symmetrize calibration data for weights. + use_unsigned_relu (`bool`, defaults to `False`): + If True, the output tensor of ReLU and Clip, whose min is 0, will be forced to be asymmetric. + quantize_bias (`bool`, defaults to `True`): + If True, quantize the Bias as normal weights. + remove_input_init (`bool`, defaults to `True`): + If True, initializer in graph inputs will be removed because it will not be treated as a constant value/weight. + This may prevent some of the graph optimizations, like const folding. + enable_subgraph (`bool`, defaults to `False`): + If True, the subgraph will be quantized. More support for this feature is planned in the future. + force_quantize_no_input_check (`bool`, defaults to `False`): + If True, latent operators such as maxpool and transpose will always quantize their inputs, generating quantized + outputs even if their inputs have not been quantized. + matmul_const_b_only (`bool`, defaults to `False`): + If True, only MatMul operations with a constant 'B' will be quantized. + add_qdq_pair_to_weight (`bool`, defaults to `False`): + If True, both QuantizeLinear and DeQuantizeLinear nodes are inserted for weight, maintaining its floating-point format. + In the PowerOfTwoMethod calibration method, this setting will also be effective for the bias. + op_types_to_exclude_output_quantization (`List[str] or None`, defaults to `[]`): + If specified, the output of operators with these types will not be quantized. + dedicated_qdq_pair (`bool`, defaults to `False`): + If True, an identical and dedicated QDQ pair is created for each node, allowing multiple nodes to share a single QDQ pair + as their inputs. + qdq_op_type_per_channel_support_to_axis (`Dict`, defaults to `{}`): + Sets the channel axis for specific operator types (e.g., {'MatMul': 1}). + use_qdq_vitis_custom_ops (`bool`, defaults to `True`): + If True, The UInt8 and Int8 quantization will be executed by the custom operations library, otherwise by the library + of onnxruntime extensions. Only valid in vai_q_onnx.VitisQuantFormat.QDQ. + calib_tensor_range_symmetric (`bool`, defaults to `False`): + If True, the final range of the tensor during calibration will be symmetrically set around the central point "0". + In PowerOfTwoMethod calibration method, the default is True. + calib_moving_average (`bool`, defaults to `False`): + If True, the moving average of the minimum and maximum values will be computed when the calibration method selected is + MinMax. In PowerOfTwoMethod calibration method, this should be set to False. + calib_moving_average_constant (`float`, defaults to `0.01`): + Specifies the constant smoothing factor to use when computing the moving average of the minimum and maximum values. + Only effective when the calibration method selected is MinMax and CalibMovingAverage is set to True. + In PowerOfTwoMethod calibration method, this option is unsupported. + random_data_reader_input_data_range (`Dict or None`, defaults to `None`): + Specifies the data range for each input if used random data reader (calibration_data_reader is None). + int16_scale (`bool`, defaults to `False`): + If True, the float scale will be replaced by the closest value corresponding to M and 2**N, where the range of M and 2**N + is within the representation range of int16 and uint16. + min_mse_mode (`str`, defaults to `'All'`): + When using vai_q_onnx.PowerOfTwoMethod.MinMSE, you can specify the method for calculating minmse. + By default, minmse is calculated using all calibration data. Alternatively, you can set the mode to "MostCommon", + where minmse is calculated for each batch separately and take the most common value. + convert_bn_to_conv (`bool`, defaults to `True`): + If True, the BatchNormalization operation will be converted to Conv operation when enable_dpu is True. + convert_reduce_mean_to_global_avg_pool (`bool`, defaults to `True`): + If True, the Reduce Mean operation will be converted to Global Average Pooling operation when enable_dpu is True. + split_large_kernel_pool (`bool`, defaults to `True`): + If True, the large kernel Global Average Pooling operation will be split into multiple Average Pooling operation when + enable_dpu is True. + convert_split_to_slice (`bool`, defaults to `True`): + If True, the Split operation will be converted to Slice operation when enable_dpu is True. + fuse_instance_norm (`bool`, defaults to `False`): + If True, the split instance norm operation will be fused to InstanceNorm operation when enable_dpu is True. + fuse_l2_norm (`bool`, defaults to `False`): + If True, a set of L2norm operations will be fused to L2Norm operation when enable_dpu is True. + convert_clip_to_relu (`bool`, defaults to `False`): + If True, the Clip operations that have a min value of 0 will be converted to ReLU operations. + simulate_dpu (`bool`, defaults to `True`): + If True, a simulation transformation that replaces some operations with an approximate implementation will be applied + for DPU when enable_dpu is True. + convert_leaky_relu_to_dpu_version (`bool`, defaults to `True`): + If True, the Leaky Relu operation will be converted to DPU version when SimulateDPU is True. + convert_sigmoid_to_hard_sigmoid (`bool`, defaults to `True`): + If True, the Sigmoid operation will be converted to Hard Sigmoid operation when SimulateDPU is True. + convert_hard_sigmoid_to_dpu_version (`bool`, defaults to `True`): + If True, the Hard Sigmoid operation will be converted to DPU version when SimulateDPU is True. + convert_avg_pool_to_dpu_version (`bool`, defaults to `True`): + If True, the global or kernel-based Average Pooling operation will be converted to DPU version when SimulateDPU is True. + convert_reduce_mean_to_dpu_version (`bool`, defaults to `True`): + If True, the ReduceMean operation will be converted to DPU version when SimulateDPU is True. + convert_softmax_to_dpu_version (`bool`, defaults to `False`): + If True, the Softmax operation will be converted to DPU version when SimulateDPU is True. + ipu_limitation_check (`bool`, defaults to `True`): + If True, the quantization scale will be adjusted due to the limitation of DPU/NPU. + adjust_shift_cut (`bool`, defaults to `True`): + If True, adjust the shift cut of nodes when ipu_limitation_check is True. + adjust_shift_bias (`bool`, defaults to `True`): + If True, adjust the shift bias of nodes when ipu_limitation_check is True. + adjust_shift_read (`bool`, defaults to `True`): + If True, adjust the shift read of nodes when ipu_limitation_check is True. + adjust_shift_write (`bool`, defaults to `True`): + If True, adjust the shift write of nodes when ipu_limitation_check is True. + adjust_hard_sigmoid (`bool`, defaults to `True`): + If True, adjust the pos of hard sigmoid nodes when ipu_limitation_check is True. + adjust_shift_swish (`bool`, defaults to `True`): + If True, adjust the shift swish when ipu_limitation_check is True. + align_concat (`bool`, defaults to `True`): + If True, adjust the quantization pos of concat when ipu_limitation_check is True. + align_pool (`bool`, defaults to `True`): + If True, adjust the quantization pos of pooling when ipu_limitation_check is True. + replace_clip6_relu (`bool`, defaults to `False`): + If True, replace Clip(0,6) with Relu in the model. + cle_steps (`int`, defaults to `1`): + Specifies the steps for CrossLayerEqualization execution when include_cle is set to true. When set to -1, + an adaptive CrossLayerEqualization will be conducted. + cle_total_layer_diff_threshold (`float`, defaults to `2e-7`): + Specifies The threshold represents the sum of mean transformations of CrossLayerEqualization transformations across + all layers when utilizing CrossLayerEqualization. + cle_scale_append_bias (`bool`, defaults to `True`): + Whether the bias be included when calculating the scale of the weights. + remove_qdq_conv_leaky_relu (`bool`, defaults to `False`): + If True, the QDQ between Conv and LeakyRelu will be removed for DPU when enable_dpu is True. + remove_qdq_conv_prelu (`bool`, defaults to `False`): + If True, the QDQ between Conv and PRelu will be removed for DPU when enable_dpu is True. + """ + + activation_symmetric: bool = False + weight_symmetric: bool = True + use_unsigned_relu: bool = False + quantize_bias: bool = True + remove_input_init: bool = True + enable_subgraph: bool = False + force_quantize_no_input_check: bool = False + matmul_const_b_only: bool = False + add_qdq_pair_to_weight: bool = False + op_types_to_exclude_output_quantization: Union[List[str], None] = None + dedicated_qdq_pair: bool = False + qdq_op_type_per_channel_support_to_axis: Dict = field(default_factory=lambda: {}) + use_qdq_vitis_custom_ops: bool = True + calib_tensor_range_symmetric: bool = False + calib_moving_average: bool = False + calib_moving_average_constant: float = 0.01 + random_data_reader_input_data_range: Union[Dict, None] = None + int16_scale: bool = False + min_mse_mode: str = "All" + convert_bn_to_conv: bool = True + convert_reduce_mean_to_global_avg_pool: bool = True + split_large_kernel_pool: bool = True + convert_split_to_slice: bool = True + fuse_instance_norm: bool = False + fuse_l2_norm: bool = False + convert_clip_to_relu: bool = False + simulate_dpu: bool = True + convert_leaky_relu_to_dpu_version: bool = True + convert_sigmoid_to_hard_sigmoid: bool = True + convert_hard_sigmoid_to_dpu_version: bool = True + convert_avg_pool_to_dpu_version: bool = True + convert_reduce_mean_to_dpu_version: bool = True + convert_softmax_to_dpu_version: bool = False + ipu_limitation_check: bool = True + adjust_shift_cut: bool = True + adjust_shift_bias: bool = True + adjust_shift_read: bool = True + adjust_shift_write: bool = True + adjust_hard_sigmoid: bool = True + adjust_shift_swish: bool = True + align_concat: bool = True + align_pool: bool = True + replace_clip6_relu: bool = False + cle_steps: int = 1 + cle_total_layer_diff_threshold: float = 2e-7 + cle_scale_append_bias: bool = True + remove_qdq_conv_leaky_relu: bool = False + remove_qdq_conv_prelu: bool = False + + @property + def snake_to_camel(self): + return { + 'qdq_op_type_per_channel_support_to_axis': 'QDQOpTypePerChannelSupportToAxis', + 'ipu_limitation_check': 'IPULimitationCheck', + 'cle_steps': 'CLESteps', + 'cle_total_layer_diff_threshold': 'CLETotalLayerDiffThreshold', + 'cle_scale_append_bias': 'CLEScaleAppendBias' + } + + @property + def camel_to_snake(self): + return {value: key for key, value in self.snake_to_camel.items()} + + def __setattr__(self, name, value): + snake_case_name = self.camel_to_snake.get(name, re.sub(r'([A-Z])', r'_\1', name).lower().lstrip('_')) + + super().__setattr__(snake_case_name, value) + + def __getattr__(self, name): + snake_case_name = self.camel_to_snake.get(name, re.sub(r'([A-Z])', r'_\1', name).lower().lstrip('_')) + return getattr(self, snake_case_name) + + def get_non_default_values(self, camel_case=True) -> dict: + non_default_values = {} + for field in fields(self): + if camel_case: + name = self.snake_to_camel.get(field.name, ''.join(word.capitalize() for word in field.name.split('_'))) + else: + name = field.name + if getattr(self, field.name) != field.default: + non_default_values[name] = getattr(self, field.name) + return non_default_values + @dataclass class QuantizationConfig: """ @@ -20,34 +245,89 @@ class QuantizationConfig: Args: is_static (`bool`): Whether to apply static quantization or dynamic quantization. - format (`QuantFormat`): - Targeted RyzenAI quantization representation format. - For the Operator Oriented (QOperator) format, all the quantized operators have their own ONNX definitions. - For the Tensor Oriented (QDQ) format, the model is quantized by inserting QuantizeLinear / DeQuantizeLinear - operators. - calibration_method (`CalibrationMethod`): - The method chosen to calculate the activations quantization parameters using the calibration dataset. - activations_dtype (`QuantType`, defaults to `QuantType.QUInt8`): - The quantization data types to use for the activations. - activations_symmetric (`bool`, defaults to `False`): - Whether to apply symmetric quantization on the activations. - weights_dtype (`QuantType`, defaults to `QuantType.QInt8`): - The quantization data types to use for the weights. - weights_symmetric (`bool`, defaults to `True`): - Whether to apply symmetric quantization on the weights. - enable_dpu (`bool`, defaults to `True`): - Determines whether to generate a quantized model that is suitable for the DPU. If set to True, the quantization - process will create a model that is optimized for DPU computations. - + format (Union[QuantFormat, str], defaults to `QuantFormat.QDQ`): + This parameter is used to specify the quantization format of the model. + Options: + - `QuantFormat.QOperator`: Quantizes the model directly using quantized operators. + - `QuantFormat.QDQ`: Quantizes the model by inserting QuantizeLinear/DeQuantizeLinear into the tensor. + Supports 8-bit quantization only. + - `QuantFormat.VitisQuantFormat`: Quantizes the model by inserting VitisQuantizeLinear/VitisDequantizeLinear + into the tensor. Supports a wider range of bit-widths and precisions. + - `QuantFormat.FixNeuron` (Experimental): Quantizes the model by inserting FixNeuron (a combination of + QuantizeLinear and DeQuantizeLinear) into the tensor. Experimental and not recommended for deployment. + calibration_method (Union[CalibrationMethod, str], defaults to `CalibrationMethod.MinMSE`): + The method used in calibration. + Options (for CNNs running on NPU, power-of-two methods; for Transformers on NPU or CNNs on CPU, float scale methods): + - `CalibrationMethod.NonOverflow`: Power-of-two method to prevent min/max values from overflowing. + - `CalibrationMethod.MinMSE`: Power-of-two method to minimize mean-square-loss of quantized values and float values. + Longer calibration time but usually better accuracy. + - `CalibrationMethod.MinMax`: Obtain quantization parameters based on minimum and maximum values of each tensor. + - `CalibrationMethod.Entropy`: Determine quantization parameters based on the entropy algorithm of each tensor's distribution. + - `CalibrationMethod.Percentile`: Calculate quantization parameters using percentiles of tensor values. + activations_dtype (QuantType, defaults to `QuantType.QUInt8`): + The quantization data type to use for the activations. + weights_dtype (QuantType, defaults to `QuantType.QInt8`): + The quantization data type to use for the weights. + enable_dpu (bool, defaults to `True`): + Flag to generate a quantized model suitable for DPU/NPU computations. If True, the quantization process will + consider specific limitations and requirements of DPU/NPU, optimizing the model accordingly. + input_nodes (List[str], defaults to an empty list `[]`): + List of names of starting nodes to be quantized. Nodes before these nodes will not be quantized. + output_nodes (List[str], defaults to an empty list `[]`): + List of names of end nodes to be quantized. Nodes after these nodes will not be quantized. + op_types_to_quantize (List[str], defaults to an empty list `[]`): + If specified, only operators of the given types will be quantized (e.g., ['Conv'] to quantize Convolutional layers). + random_data_reader_input_shape (Union[List[int], Tuple[int], Dict[str, List[int]]], defaults to an empty list `[]`): + Shapes of input nodes for internal random data reader. If dynamic axes require specific values, provide shapes. + Format: list/tuple for single input, list of lists for multiple inputs, or dict {name: shape} for named inputs. + per_channel (bool, defaults to `False`): + Determines whether weights should be quantized per channel. Must be False for DPU/NPU devices. + reduce_range (bool, defaults to `False`): + If True, quantizes weights with 7-bits. Must be False for DPU/NPU devices. + activation_type (QuantType, defaults to `QuantType.QInt8`): + Specifies the quantization data type for activations. + weight_type (QuantType, defaults to `QuantType.QInt8`): + Specifies the quantization data type for weights. Must be `QuantType.QInt8` for NPU devices. + nodes_to_quantize (List[str], defaults to an empty list `[]`): + If specified, only the nodes in this list are quantized. + nodes_to_exclude (List[str], defaults to an empty list `[]`): + If specified, nodes in this list will be excluded from quantization. + optimize_model (bool, defaults to `True`): + If True, optimizes the model before quantization. + use_external_data_format (bool, defaults to `False`): + Flag for large size (>2GB) models. If True, model proto and data will be stored in separate files. + execution_providers (List[str], defaults to `['CPUExecutionProvider']`): + Defines the execution providers used by ONNX Runtime for model calibration. + convert_fp16_to_fp32 (bool, defaults to `False`): + Controls whether to convert the input model from float16 to float32 before quantization. + convert_nchw_to_nhwc (bool, defaults to `False`): + Controls whether to convert the input NCHW model to NHWC model before quantization. + include_cle (bool, defaults to `False`): + Flag to optimize models using CrossLayerEqualization; can improve accuracy for some models. + extra_options (Dict or None, defaults to an instance of `ExtraOptions` with default values): + Contains key-value pairs for various options in different cases. """ format: QuantFormat = QuantFormat.QDQ - calibration_method: CalibrationMethod = vai_q_onnx.PowerOfTwoMethod.MinMSE - activations_dtype: QuantType = QuantType.QUInt8 - activations_symmetric: bool = True - weights_dtype: QuantType = QuantType.QInt8 - weights_symmetric: bool = True - enable_dpu: bool = True + calibration_method: Union[CalibrationMethod, str] = CalibrationMethod.MinMSE + input_nodes: List[str] = field(default_factory=list) + output_nodes: List[str] = field(default_factory=list) + op_types_to_quantize: List[str] = field(default_factory=list) + random_data_reader_input_shape: Union[List[int], Tuple[int], Dict[str, List[int]]] = field(default_factory=list) + per_channel: bool = False + reduce_range: bool = False + activation_type: QuantType = QuantType.QInt8 + weight_type: QuantType = QuantType.QInt8 + nodes_to_quantize: List[str] = field(default_factory=list) + nodes_to_exclude: List[str] = field(default_factory=list) + optimize_model: bool = True + use_external_data_format: bool = False + execution_providers: List[str] = field(default_factory=lambda: ["CPUExecutionProvider"]) + enable_dpu: bool = False + convert_fp16_to_fp32: bool = False + convert_nchw_to_nhwc: bool = False + include_cle: bool = False + extra_options: ExtraOptions = field(default_factory=ExtraOptions) @staticmethod def quantization_type_str(activations_dtype: QuantType, weights_dtype: QuantType) -> str: @@ -59,43 +339,92 @@ def quantization_type_str(activations_dtype: QuantType, weights_dtype: QuantType @property def use_symmetric_calibration(self) -> bool: - return self.activations_symmetric and self.weights_symmetric + return self.extra_options.activation_symmetric and self.extra_options.weight_symmetric - def __str__(self): - return ( - f"{self.format} (" - f"schema: {QuantizationConfig.quantization_type_str(self.activations_dtype, self.weights_dtype)}, " - f"enable_dpu: {self.enable_dpu})" - ) + # def __str__(self): + # return ( + # f"{self.format} (" + # f"schema: {QuantizationConfig.quantization_type_str(self.activation_type, self.weight_type)}, " + # f"enable_dpu: {self.enable_dpu})" + # ) class AutoQuantizationConfig: @staticmethod - def ipu_cnn_config(): + def ipu_cnn_config( + calibrate_method: CalibrationMethod = CalibrationMethod.MinMSE, + nodes_to_quantize: Optional[List[str]] = None, + nodes_to_exclude: Optional[List[str]] = None, + op_types_to_quantize: Optional[List[str]] = None, + extra_options: Optional[Union[Dict[str, bool], ExtraOptions]] = None, + ): + if isinstance(extra_options, ExtraOptions): + extra_options_dict = extra_options.__dict__ + else: + extra_options_dict = extra_options or {} + + extra_options_dict["activation_symmetric"] = extra_options_dict.get("activation_symmetric", True) + return QuantizationConfig( format=QuantFormat.QDQ, - calibration_method=vai_q_onnx.PowerOfTwoMethod.MinMSE, - activations_dtype=QuantType.QUInt8, - activations_symmetric=True, - weights_dtype=QuantType.QInt8, - weights_symmetric=True, + calibration_method=calibrate_method, + activation_type=QuantType.QUInt8, + weight_type=QuantType.QInt8, enable_dpu=True, + op_types_to_quantize=op_types_to_quantize, + nodes_to_quantize=nodes_to_quantize or [], + nodes_to_exclude=nodes_to_exclude or [], + extra_options=ExtraOptions(**extra_options_dict), + ) + + @staticmethod + def ipu_transformer_config( + calibrate_method: CalibrationMethod = CalibrationMethod.MinMax, + nodes_to_quantize: Optional[List[str]] = None, + nodes_to_exclude: Optional[List[str]] = None, + op_types_to_quantize: Optional[List[str]] = None, + extra_options: Optional[Union[Dict[str, bool], ExtraOptions]] = None, + ): + if isinstance(extra_options, ExtraOptions): + extra_options_dict = extra_options.__dict__ + else: + extra_options_dict = extra_options or {} + + extra_options_dict["activation_symmetric"] = extra_options_dict.get("activation_symmetric", True) + + return QuantizationConfig( + format=QuantFormat.QDQ, + calibration_method=calibrate_method, + activation_type=QuantType.QInt8, + weight_type=QuantType.QInt8, + op_types_to_quantize=op_types_to_quantize, + nodes_to_quantize=nodes_to_quantize or [], + nodes_to_exclude=nodes_to_exclude or [], + extra_options=ExtraOptions(**extra_options_dict), ) @staticmethod def cpu_cnn_config( - use_symmetric_activations: bool = False, - use_symmetric_weights: bool = True, - enable_dpu: bool = False, + calibrate_method: CalibrationMethod = CalibrationMethod.MinMax, + nodes_to_quantize: Optional[List[str]] = None, + nodes_to_exclude: Optional[List[str]] = None, + op_types_to_quantize: Optional[List[str]] = None, + extra_options: Optional[Union[Dict[str, bool], ExtraOptions]] = None, ): + if isinstance(extra_options, ExtraOptions): + extra_options_dict = extra_options.__dict__ + else: + extra_options_dict = extra_options or {} + return QuantizationConfig( format=QuantFormat.QDQ, - calibration_method=vai_q_onnx.CalibrationMethod.MinMax, - activations_dtype=QuantType.QUInt8, - activations_symmetric=use_symmetric_activations, - weights_dtype=QuantType.QInt8, - weights_symmetric=use_symmetric_weights, - enable_dpu=enable_dpu, + calibration_method=calibrate_method, + activation_type=QuantType.QUInt8, + weight_type=QuantType.QInt8, + op_types_to_quantize=op_types_to_quantize, + nodes_to_quantize=nodes_to_quantize or [], + nodes_to_exclude=nodes_to_exclude or [], + extra_options=ExtraOptions(**extra_options_dict), ) diff --git a/optimum/amd/ryzenai/quantization.py b/optimum/amd/ryzenai/quantization.py index cc6633ce..45866bd7 100644 --- a/optimum/amd/ryzenai/quantization.py +++ b/optimum/amd/ryzenai/quantization.py @@ -159,19 +159,19 @@ def quantize( quantized_model_path = save_dir.joinpath(f"{self.onnx_model_path.stem}{suffix}").with_suffix(".onnx") LOGGER.info("Quantizing model...") + quantize_static( model_input=Path(self.onnx_model_path).as_posix(), model_output=quantized_model_path.as_posix(), calibration_data_reader=reader, quant_format=quantization_config.format, calibrate_method=quantization_config.calibration_method, - weight_type=quantization_config.weights_dtype, - activation_type=quantization_config.activations_dtype, + weight_type=quantization_config.weight_type, + activation_type=quantization_config.activation_type, enable_dpu=quantization_config.enable_dpu, - extra_options={ - "WeightSymmetric": quantization_config.weights_symmetric, - "ActivationSymmetric": quantization_config.activations_symmetric, - }, + extra_options=quantization_config.extra_options.get_non_default_values() + if quantization_config.extra_options + else {}, ) LOGGER.info(f"Saved quantized model at: {save_dir}") From ceef198e01703d2c34d087d09a540828f788d10e Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Fri, 8 Mar 2024 20:16:20 +0530 Subject: [PATCH 03/22] added diff dict --- optimum/amd/ryzenai/configuration.py | 48 ++++++++++++++++++++++++---- optimum/amd/ryzenai/quantization.py | 2 +- 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/optimum/amd/ryzenai/configuration.py b/optimum/amd/ryzenai/configuration.py index 98040877..6612b3f0 100644 --- a/optimum/amd/ryzenai/configuration.py +++ b/optimum/amd/ryzenai/configuration.py @@ -165,7 +165,7 @@ class ExtraOptions: add_qdq_pair_to_weight: bool = False op_types_to_exclude_output_quantization: Union[List[str], None] = None dedicated_qdq_pair: bool = False - qdq_op_type_per_channel_support_to_axis: Dict = field(default_factory=lambda: {}) + qdq_op_type_per_channel_support_to_axis: Dict = field(default_factory=dict) use_qdq_vitis_custom_ops: bool = True calib_tensor_range_symmetric: bool = False calib_moving_average: bool = False @@ -226,14 +226,14 @@ def __getattr__(self, name): snake_case_name = self.camel_to_snake.get(name, re.sub(r'([A-Z])', r'_\1', name).lower().lstrip('_')) return getattr(self, snake_case_name) - def get_non_default_values(self, camel_case=True) -> dict: + def to_diff_dict(self, camel_case=False) -> dict: non_default_values = {} for field in fields(self): if camel_case: name = self.snake_to_camel.get(field.name, ''.join(word.capitalize() for word in field.name.split('_'))) else: name = field.name - if getattr(self, field.name) != field.default: + if getattr(self, field.name) != field.default and getattr(self, field.name) != {}: non_default_values[name] = getattr(self, field.name) return non_default_values @@ -304,7 +304,7 @@ class QuantizationConfig: Controls whether to convert the input NCHW model to NHWC model before quantization. include_cle (bool, defaults to `False`): Flag to optimize models using CrossLayerEqualization; can improve accuracy for some models. - extra_options (Dict or None, defaults to an instance of `ExtraOptions` with default values): + extra_options (Union[Dict, None, ExtraOptions], defaults to an instance of `ExtraOptions` with default values): Contains key-value pairs for various options in different cases. """ @@ -329,6 +329,39 @@ class QuantizationConfig: include_cle: bool = False extra_options: ExtraOptions = field(default_factory=ExtraOptions) + def __post_init__(self): + if isinstance(self.extra_options, dict): + self.extra_options = ExtraOptions(**self.extra_options) + + def __setattr__(self, name, value): + if name == 'extra_options' and isinstance(value, dict): + from pdb import set_trace; set_trace() + setattr(self, 'extra_options', ExtraOptions(**value)) + else: + super().__setattr__(name, value) + + def to_diff_dict(self) -> dict: + non_default_values = {} + for field in fields(self): + if field.name == 'extra_options': + extra_options_dict = getattr(self, field.name).to_diff_dict() + if extra_options_dict: + non_default_values[field.name] = extra_options_dict + else: + value = getattr(self, field.name) + if value != field.default and value not in ({}, []): + if field.name == "execution_providers" and value == ["CPUExecutionProvider"]: + continue + + if isinstance(value, Enum): + value = value.name + elif isinstance(value, list): + value = [elem.name if isinstance(elem, Enum) else elem for elem in value] + + non_default_values[field.name] = value + return non_default_values + + @staticmethod def quantization_type_str(activations_dtype: QuantType, weights_dtype: QuantType) -> str: return ( @@ -339,7 +372,10 @@ def quantization_type_str(activations_dtype: QuantType, weights_dtype: QuantType @property def use_symmetric_calibration(self) -> bool: - return self.extra_options.activation_symmetric and self.extra_options.weight_symmetric + if self.extra_options: + return self.extra_options.activation_symmetric and self.extra_options.weight_symmetric + + return ExtraOptions().activation_symmetric and ExtraOptions().weight_symmetric # def __str__(self): # return ( @@ -451,7 +487,7 @@ def __init__( ): super().__init__() self.opset = opset - self.quantization = self.dataclass_to_dict(quantization) + self.quantization = quantization.to_diff_dict() if quantization is not None else None self.optimum_version = kwargs.pop("optimum_version", None) @staticmethod diff --git a/optimum/amd/ryzenai/quantization.py b/optimum/amd/ryzenai/quantization.py index 45866bd7..3bfa5194 100644 --- a/optimum/amd/ryzenai/quantization.py +++ b/optimum/amd/ryzenai/quantization.py @@ -169,7 +169,7 @@ def quantize( weight_type=quantization_config.weight_type, activation_type=quantization_config.activation_type, enable_dpu=quantization_config.enable_dpu, - extra_options=quantization_config.extra_options.get_non_default_values() + extra_options=quantization_config.extra_options.to_diff_dict(camel_case=True) if quantization_config.extra_options else {}, ) From 51334f646c52f396b8f117e6d7f509b65f329872 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Mon, 11 Mar 2024 12:39:34 +0530 Subject: [PATCH 04/22] added diff dict --- optimum/amd/ryzenai/configuration.py | 39 ++++++++++++++-------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/optimum/amd/ryzenai/configuration.py b/optimum/amd/ryzenai/configuration.py index 6612b3f0..867590b3 100644 --- a/optimum/amd/ryzenai/configuration.py +++ b/optimum/amd/ryzenai/configuration.py @@ -2,16 +2,15 @@ # Licensed under the MIT License. """Configuration classes for quantization with RyzenAI.""" -from dataclasses import asdict, dataclass -from enum import Enum -from typing import Optional import re +from dataclasses import asdict, dataclass, field, fields +from enum import Enum +from typing import Dict, List, Optional, Tuple, Union + import vai_q_onnx from onnxruntime.quantization import CalibrationMethod from optimum.configuration_utils import BaseConfig -from dataclasses import dataclass, field, fields -from typing import List, Tuple, Dict, Union class CalibrationMethod(Enum): @@ -206,11 +205,11 @@ class ExtraOptions: @property def snake_to_camel(self): return { - 'qdq_op_type_per_channel_support_to_axis': 'QDQOpTypePerChannelSupportToAxis', - 'ipu_limitation_check': 'IPULimitationCheck', - 'cle_steps': 'CLESteps', - 'cle_total_layer_diff_threshold': 'CLETotalLayerDiffThreshold', - 'cle_scale_append_bias': 'CLEScaleAppendBias' + "qdq_op_type_per_channel_support_to_axis": "QDQOpTypePerChannelSupportToAxis", + "ipu_limitation_check": "IPULimitationCheck", + "cle_steps": "CLESteps", + "cle_total_layer_diff_threshold": "CLETotalLayerDiffThreshold", + "cle_scale_append_bias": "CLEScaleAppendBias", } @property @@ -218,25 +217,28 @@ def camel_to_snake(self): return {value: key for key, value in self.snake_to_camel.items()} def __setattr__(self, name, value): - snake_case_name = self.camel_to_snake.get(name, re.sub(r'([A-Z])', r'_\1', name).lower().lstrip('_')) + snake_case_name = self.camel_to_snake.get(name, re.sub(r"([A-Z])", r"_\1", name).lower().lstrip("_")) super().__setattr__(snake_case_name, value) def __getattr__(self, name): - snake_case_name = self.camel_to_snake.get(name, re.sub(r'([A-Z])', r'_\1', name).lower().lstrip('_')) + snake_case_name = self.camel_to_snake.get(name, re.sub(r"([A-Z])", r"_\1", name).lower().lstrip("_")) return getattr(self, snake_case_name) def to_diff_dict(self, camel_case=False) -> dict: non_default_values = {} for field in fields(self): if camel_case: - name = self.snake_to_camel.get(field.name, ''.join(word.capitalize() for word in field.name.split('_'))) + name = self.snake_to_camel.get( + field.name, "".join(word.capitalize() for word in field.name.split("_")) + ) else: name = field.name if getattr(self, field.name) != field.default and getattr(self, field.name) != {}: non_default_values[name] = getattr(self, field.name) return non_default_values + @dataclass class QuantizationConfig: """ @@ -334,21 +336,21 @@ def __post_init__(self): self.extra_options = ExtraOptions(**self.extra_options) def __setattr__(self, name, value): - if name == 'extra_options' and isinstance(value, dict): - from pdb import set_trace; set_trace() - setattr(self, 'extra_options', ExtraOptions(**value)) + if name == "extra_options" and isinstance(value, dict): + setattr(self, "extra_options", ExtraOptions(**value)) else: super().__setattr__(name, value) def to_diff_dict(self) -> dict: non_default_values = {} for field in fields(self): - if field.name == 'extra_options': + if field.name == "extra_options": extra_options_dict = getattr(self, field.name).to_diff_dict() if extra_options_dict: non_default_values[field.name] = extra_options_dict else: value = getattr(self, field.name) + if value != field.default and value not in ({}, []): if field.name == "execution_providers" and value == ["CPUExecutionProvider"]: continue @@ -361,7 +363,6 @@ def to_diff_dict(self) -> dict: non_default_values[field.name] = value return non_default_values - @staticmethod def quantization_type_str(activations_dtype: QuantType, weights_dtype: QuantType) -> str: return ( @@ -374,7 +375,7 @@ def quantization_type_str(activations_dtype: QuantType, weights_dtype: QuantType def use_symmetric_calibration(self) -> bool: if self.extra_options: return self.extra_options.activation_symmetric and self.extra_options.weight_symmetric - + return ExtraOptions().activation_symmetric and ExtraOptions().weight_symmetric # def __str__(self): From 591ee628a4656951d415c03264e55793487f28b6 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Mon, 11 Mar 2024 12:50:59 +0530 Subject: [PATCH 05/22] removed fpn --- optimum/amd/ryzenai/__init__.py | 4 ++-- optimum/amd/ryzenai/modeling.py | 4 ---- optimum/amd/ryzenai/pipelines/__init__.py | 24 ++--------------------- optimum/amd/ryzenai/quantization.py | 8 +++++--- tests/ryzenai/test_modeling.py | 6 +++--- 5 files changed, 12 insertions(+), 34 deletions(-) diff --git a/optimum/amd/ryzenai/__init__.py b/optimum/amd/ryzenai/__init__.py index 1759411c..fad6453b 100644 --- a/optimum/amd/ryzenai/__init__.py +++ b/optimum/amd/ryzenai/__init__.py @@ -12,7 +12,7 @@ "RyzenAIModel", "RyzenAIModelForCustomTasks", "RyzenAIModelForImageClassification", - "RyzenAIModelForSemanticSegmentation", + "RyzenAIModelForImageSegmentation", "RyzenAIModelForImageToImage", "RyzenAIModelForObjectDetection", ], @@ -29,9 +29,9 @@ RyzenAIModel, RyzenAIModelForCustomTasks, RyzenAIModelForImageClassification, + RyzenAIModelForImageSegmentation, RyzenAIModelForImageToImage, RyzenAIModelForObjectDetection, - RyzenAIModelForSemanticSegmentation, ) from .pipelines import pipeline from .quantization import RyzenAIOnnxQuantizer diff --git a/optimum/amd/ryzenai/modeling.py b/optimum/amd/ryzenai/modeling.py index 4246221f..8ccfc6e9 100644 --- a/optimum/amd/ryzenai/modeling.py +++ b/optimum/amd/ryzenai/modeling.py @@ -644,9 +644,5 @@ class RyzenAIModelForImageSegmentation(RyzenAIModelForObjectDetection): pass -class RyzenAIModelForSemanticSegmentation(RyzenAIModelForObjectDetection): - pass - - class RyzenAIModelForImageToImage(RyzenAIModelForObjectDetection): pass diff --git a/optimum/amd/ryzenai/pipelines/__init__.py b/optimum/amd/ryzenai/pipelines/__init__.py index f4231e21..608c7334 100644 --- a/optimum/amd/ryzenai/pipelines/__init__.py +++ b/optimum/amd/ryzenai/pipelines/__init__.py @@ -10,21 +10,9 @@ from transformers.image_processing_utils import BaseImageProcessor from transformers.onnx.utils import get_preprocessor -from ..modeling import ( - RyzenAIModel, - RyzenAIModelForImageClassification, - RyzenAIModelForObjectDetection, - RyzenAIModelForSemanticSegmentation, -) -from ..models import ( - SemanticFPNImageProcessor, - YoloV3ImageProcessor, - YoloV5ImageProcessor, - YoloV8ImageProcessor, - YoloXImageProcessor, -) +from ..modeling import RyzenAIModel, RyzenAIModelForImageClassification, RyzenAIModelForObjectDetection +from ..models import YoloV3ImageProcessor, YoloV5ImageProcessor, YoloV8ImageProcessor, YoloXImageProcessor from .image_classification import TimmImageClassificationPipeline -from .image_segmentation import ImageSegmentationPipeline from .object_detection import YoloObjectDetectionPipeline @@ -36,7 +24,6 @@ "yolov5": {"preprocessor": YoloV5ImageProcessor, "impl": YoloObjectDetectionPipeline}, "yolov3": {"preprocessor": YoloV3ImageProcessor, "impl": YoloObjectDetectionPipeline}, "yolov8": {"preprocessor": YoloV8ImageProcessor, "impl": YoloObjectDetectionPipeline}, - "semantic_fpn": {"preprocessor": SemanticFPNImageProcessor, "impl": ImageSegmentationPipeline}, } RYZENAI_SUPPORTED_TASKS = { @@ -53,13 +40,6 @@ "type": "image", "model_type": "yolox", }, - "image-segmentation": { - "impl": ImageSegmentationPipeline, - "class": (RyzenAIModelForSemanticSegmentation,), - "default": "amd/SemanticFPN", - "type": "image", - "model_type": "semantic_fpn", - }, } diff --git a/optimum/amd/ryzenai/quantization.py b/optimum/amd/ryzenai/quantization.py index 3bfa5194..8797deb6 100644 --- a/optimum/amd/ryzenai/quantization.py +++ b/optimum/amd/ryzenai/quantization.py @@ -169,9 +169,11 @@ def quantize( weight_type=quantization_config.weight_type, activation_type=quantization_config.activation_type, enable_dpu=quantization_config.enable_dpu, - extra_options=quantization_config.extra_options.to_diff_dict(camel_case=True) - if quantization_config.extra_options - else {}, + extra_options=( + quantization_config.extra_options.to_diff_dict(camel_case=True) + if quantization_config.extra_options + else {} + ), ) LOGGER.info(f"Saved quantized model at: {save_dir}") diff --git a/tests/ryzenai/test_modeling.py b/tests/ryzenai/test_modeling.py index fbd82db3..c086c17c 100644 --- a/tests/ryzenai/test_modeling.py +++ b/tests/ryzenai/test_modeling.py @@ -29,9 +29,9 @@ RyzenAIModel, RyzenAIModelForCustomTasks, RyzenAIModelForImageClassification, + RyzenAIModelForImageSegmentation, RyzenAIModelForImageToImage, RyzenAIModelForObjectDetection, - RyzenAIModelForSemanticSegmentation, pipeline, ) from optimum.utils import ( @@ -210,7 +210,7 @@ def test_pipeline_model_is_none(self): self.assertGreaterEqual(pred["score"], 0.0) -class RyzenAIModelForSemanticSegmentationIntegrationTest(unittest.TestCase, RyzenAITestCaseMixin): +class RyzenAIModelForImageSegmentationIntegrationTest(unittest.TestCase, RyzenAITestCaseMixin): @parameterized.expand(RYZEN_PREQUANTIZED_MODEL_IMAGE_SEGMENTATION) @pytest.mark.prequantized_model_test def test_model(self, model_id): @@ -221,7 +221,7 @@ def test_model(self, model_id): vaip_config = DEFAULT_VAIP_CONFIG outputs_ipu, outputs_cpu = self.prepare_outputs( - model_id, RyzenAIModelForSemanticSegmentation, ort_input, vaip_config, cache_dir, cache_key, file_name + model_id, RyzenAIModelForImageSegmentation, ort_input, vaip_config, cache_dir, cache_key, file_name ) for output_ipu, output_cpu in zip(outputs_ipu.values(), outputs_cpu.values()): From f0442748053660c29528ab50faae13dff4673411 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Mon, 11 Mar 2024 13:06:47 +0530 Subject: [PATCH 06/22] removed fpn --- optimum/amd/ryzenai/configuration.py | 27 +++++++++++++-------------- optimum/amd/ryzenai/quantization.py | 8 ++++---- 2 files changed, 17 insertions(+), 18 deletions(-) diff --git a/optimum/amd/ryzenai/configuration.py b/optimum/amd/ryzenai/configuration.py index 867590b3..0cd0b40a 100644 --- a/optimum/amd/ryzenai/configuration.py +++ b/optimum/amd/ryzenai/configuration.py @@ -8,7 +8,6 @@ from typing import Dict, List, Optional, Tuple, Union import vai_q_onnx -from onnxruntime.quantization import CalibrationMethod from optimum.configuration_utils import BaseConfig @@ -227,15 +226,15 @@ def __getattr__(self, name): def to_diff_dict(self, camel_case=False) -> dict: non_default_values = {} - for field in fields(self): + for option in fields(self): if camel_case: name = self.snake_to_camel.get( - field.name, "".join(word.capitalize() for word in field.name.split("_")) + option.name, "".join(word.capitalize() for word in option.name.split("_")) ) else: - name = field.name - if getattr(self, field.name) != field.default and getattr(self, field.name) != {}: - non_default_values[name] = getattr(self, field.name) + name = option.name + if getattr(self, option.name) != option.default and getattr(self, option.name) != {}: + non_default_values[name] = getattr(self, option.name) return non_default_values @@ -343,16 +342,16 @@ def __setattr__(self, name, value): def to_diff_dict(self) -> dict: non_default_values = {} - for field in fields(self): - if field.name == "extra_options": - extra_options_dict = getattr(self, field.name).to_diff_dict() + for option in fields(self): + if option.name == "extra_options": + extra_options_dict = getattr(self, option.name).to_diff_dict() if extra_options_dict: - non_default_values[field.name] = extra_options_dict + non_default_values[option.name] = extra_options_dict else: - value = getattr(self, field.name) + value = getattr(self, option.name) - if value != field.default and value not in ({}, []): - if field.name == "execution_providers" and value == ["CPUExecutionProvider"]: + if value != option.default and value not in ({}, []): + if option.name == "execution_providers" and value == ["CPUExecutionProvider"]: continue if isinstance(value, Enum): @@ -360,7 +359,7 @@ def to_diff_dict(self) -> dict: elif isinstance(value, list): value = [elem.name if isinstance(elem, Enum) else elem for elem in value] - non_default_values[field.name] = value + non_default_values[option.name] = value return non_default_values @staticmethod diff --git a/optimum/amd/ryzenai/quantization.py b/optimum/amd/ryzenai/quantization.py index 8797deb6..996b0084 100644 --- a/optimum/amd/ryzenai/quantization.py +++ b/optimum/amd/ryzenai/quantization.py @@ -164,10 +164,10 @@ def quantize( model_input=Path(self.onnx_model_path).as_posix(), model_output=quantized_model_path.as_posix(), calibration_data_reader=reader, - quant_format=quantization_config.format, - calibrate_method=quantization_config.calibration_method, - weight_type=quantization_config.weight_type, - activation_type=quantization_config.activation_type, + quant_format=quantization_config.format.value, + calibrate_method=quantization_config.calibration_method.value, + weight_type=quantization_config.weight_type.value, + activation_type=quantization_config.activation_type.value, enable_dpu=quantization_config.enable_dpu, extra_options=( quantization_config.extra_options.to_diff_dict(camel_case=True) From 44891a63b22cf4d7d6f25bccb00257d8e498c832 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Mon, 11 Mar 2024 13:08:57 +0530 Subject: [PATCH 07/22] add docsttring --- optimum/amd/ryzenai/configuration.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/optimum/amd/ryzenai/configuration.py b/optimum/amd/ryzenai/configuration.py index 0cd0b40a..853c6664 100644 --- a/optimum/amd/ryzenai/configuration.py +++ b/optimum/amd/ryzenai/configuration.py @@ -225,6 +225,9 @@ def __getattr__(self, name): return getattr(self, snake_case_name) def to_diff_dict(self, camel_case=False) -> dict: + """ + Returns a dictionary of non-default values in the configuration. + """ non_default_values = {} for option in fields(self): if camel_case: @@ -341,6 +344,9 @@ def __setattr__(self, name, value): super().__setattr__(name, value) def to_diff_dict(self) -> dict: + """ + Returns a dictionary of non-default values in the configuration. + """ non_default_values = {} for option in fields(self): if option.name == "extra_options": @@ -377,12 +383,12 @@ def use_symmetric_calibration(self) -> bool: return ExtraOptions().activation_symmetric and ExtraOptions().weight_symmetric - # def __str__(self): - # return ( - # f"{self.format} (" - # f"schema: {QuantizationConfig.quantization_type_str(self.activation_type, self.weight_type)}, " - # f"enable_dpu: {self.enable_dpu})" - # ) + def __str__(self): + return ( + f"{self.format} (" + f"schema: {QuantizationConfig.quantization_type_str(self.activation_type, self.weight_type)}, " + f"enable_dpu: {self.enable_dpu})" + ) class AutoQuantizationConfig: From 5bf18185ad91b0bc0458a0ab403d80e1122b8b83 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Mon, 11 Mar 2024 13:12:22 +0530 Subject: [PATCH 08/22] updated init models --- optimum/amd/ryzenai/models/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/optimum/amd/ryzenai/models/__init__.py b/optimum/amd/ryzenai/models/__init__.py index bc719b07..21c4210b 100644 --- a/optimum/amd/ryzenai/models/__init__.py +++ b/optimum/amd/ryzenai/models/__init__.py @@ -11,13 +11,11 @@ "yolov5": ["YoloV5ImageProcessor"], "yolov8": ["YoloV8ImageProcessor"], "yolox": ["YoloXImageProcessor"], - "semanticfpn": ["SemanticFPNImageProcessor"], } # Direct imports for type-checking if TYPE_CHECKING: - from .semanticfpn import SemanticFPNImageProcessor from .yolov3 import YoloV3ImageProcessor from .yolov5 import YoloV5ImageProcessor from .yolov8 import YoloV3ImageProcessor From be8c954ef17a913c8b4d5acf73ba575232c7538d Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Mon, 11 Mar 2024 13:18:13 +0530 Subject: [PATCH 09/22] updated init models --- optimum/amd/ryzenai/__init__.py | 21 +++++++++++++++++++-- optimum/amd/ryzenai/configuration.py | 6 ++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/optimum/amd/ryzenai/__init__.py b/optimum/amd/ryzenai/__init__.py index fad6453b..a6a8de10 100644 --- a/optimum/amd/ryzenai/__init__.py +++ b/optimum/amd/ryzenai/__init__.py @@ -7,7 +7,16 @@ _import_structure = { - "configuration": ["RyzenAIConfig", "ExtraOptions", "QuantizationConfig", "AutoQuantizationConfig"], + "configuration": [ + "AutoQuantizationConfig", + "CalibrationMethod", + "ExtraOptions", + "QuantFormat", + "QuantizationConfig", + "QuantType", + "AutoQuantizationConfig", + "RyzenAIConfig", + ], "modeling": [ "RyzenAIModel", "RyzenAIModelForCustomTasks", @@ -24,7 +33,15 @@ # Direct imports for type-checking if TYPE_CHECKING: - from .configuration import AutoQuantizationConfig, QuantizationConfig, RyzenAIConfig + from .configuration import ( + AutoQuantizationConfig, + CalibrationMethod, + ExtraOptions, + QuantFormat, + QuantizationConfig, + QuantType, + RyzenAIConfig, + ) from .modeling import ( RyzenAIModel, RyzenAIModelForCustomTasks, diff --git a/optimum/amd/ryzenai/configuration.py b/optimum/amd/ryzenai/configuration.py index 853c6664..61100107 100644 --- a/optimum/amd/ryzenai/configuration.py +++ b/optimum/amd/ryzenai/configuration.py @@ -13,6 +13,8 @@ class CalibrationMethod(Enum): + """CalibrationMethod is an enumeration of the calibration methods supported by RyzenAI quantization.""" + MinMax = vai_q_onnx.CalibrationMethod.MinMax Entropy = vai_q_onnx.CalibrationMethod.Entropy Percentile = vai_q_onnx.CalibrationMethod.Percentile @@ -21,6 +23,8 @@ class CalibrationMethod(Enum): class QuantFormat(Enum): + """QuantFormat is an enumeration of the quantization formats supported by RyzenAI quantization.""" + QOperator = vai_q_onnx.QuantFormat.QOperator QDQ = vai_q_onnx.QuantFormat.QDQ VitisQuantFormat_QDQ = vai_q_onnx.VitisQuantFormat.QDQ @@ -28,6 +32,8 @@ class QuantFormat(Enum): class QuantType(Enum): + """QuantType is an enumeration of the quantization types supported by RyzenAI quantization.""" + QInt8 = vai_q_onnx.QuantType.QInt8 QUInt8 = vai_q_onnx.QuantType.QUInt8 From 617411351f5e3d46f1a12578402f8b74ee4eac42 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Mon, 11 Mar 2024 13:21:09 +0530 Subject: [PATCH 10/22] updated docs --- .../ryzenai/package_reference/quantization.mdx | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/source/ryzenai/package_reference/quantization.mdx b/docs/source/ryzenai/package_reference/quantization.mdx index 8dcd8b45..fcda9e88 100644 --- a/docs/source/ryzenai/package_reference/quantization.mdx +++ b/docs/source/ryzenai/package_reference/quantization.mdx @@ -15,3 +15,21 @@ Please refer to the guide [How to apply quantization](https://huggingface.co/doc ### QuantizationConfig [[autodoc]] ryzenai.QuantizationConfig + +### CalibrationMethod + +[[autodoc]] ryzenai.CalibrationMethod + +### QuantFormat + +[[autodoc]] ryzenai.QuantFormat + +### QuantType + +[[autodoc]] ryzenai.QuantType + +### ExtraOptions + +[[autodoc]] ryzenai.ExtraOptions + + From acf90b0357a056e3452d77913b8f49fdcd0e9c56 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Wed, 3 Apr 2024 18:13:36 +0530 Subject: [PATCH 11/22] fix config --- optimum/amd/ryzenai/configuration.py | 26 +++++++++++++------------- optimum/amd/ryzenai/quantization.py | 1 + 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/optimum/amd/ryzenai/configuration.py b/optimum/amd/ryzenai/configuration.py index 61100107..5a3aafd2 100644 --- a/optimum/amd/ryzenai/configuration.py +++ b/optimum/amd/ryzenai/configuration.py @@ -406,11 +406,11 @@ def ipu_cnn_config( op_types_to_quantize: Optional[List[str]] = None, extra_options: Optional[Union[Dict[str, bool], ExtraOptions]] = None, ): - if isinstance(extra_options, ExtraOptions): - extra_options_dict = extra_options.__dict__ - else: - extra_options_dict = extra_options or {} + extra_options = extra_options or {} + if isinstance(extra_options, dict): + extra_options = ExtraOptions(**extra_options) + extra_options_dict = extra_options.__dict__ extra_options_dict["activation_symmetric"] = extra_options_dict.get("activation_symmetric", True) return QuantizationConfig( @@ -419,6 +419,7 @@ def ipu_cnn_config( activation_type=QuantType.QUInt8, weight_type=QuantType.QInt8, enable_dpu=True, + convert_nchw_to_nhwc=True, op_types_to_quantize=op_types_to_quantize, nodes_to_quantize=nodes_to_quantize or [], nodes_to_exclude=nodes_to_exclude or [], @@ -433,11 +434,11 @@ def ipu_transformer_config( op_types_to_quantize: Optional[List[str]] = None, extra_options: Optional[Union[Dict[str, bool], ExtraOptions]] = None, ): - if isinstance(extra_options, ExtraOptions): - extra_options_dict = extra_options.__dict__ - else: - extra_options_dict = extra_options or {} + extra_options = extra_options or {} + if isinstance(extra_options, dict): + extra_options = ExtraOptions(**extra_options) + extra_options_dict = extra_options.__dict__ extra_options_dict["activation_symmetric"] = extra_options_dict.get("activation_symmetric", True) return QuantizationConfig( @@ -459,10 +460,9 @@ def cpu_cnn_config( op_types_to_quantize: Optional[List[str]] = None, extra_options: Optional[Union[Dict[str, bool], ExtraOptions]] = None, ): - if isinstance(extra_options, ExtraOptions): - extra_options_dict = extra_options.__dict__ - else: - extra_options_dict = extra_options or {} + extra_options = extra_options or {} + if isinstance(extra_options, dict): + extra_options = ExtraOptions(**extra_options) return QuantizationConfig( format=QuantFormat.QDQ, @@ -472,7 +472,7 @@ def cpu_cnn_config( op_types_to_quantize=op_types_to_quantize, nodes_to_quantize=nodes_to_quantize or [], nodes_to_exclude=nodes_to_exclude or [], - extra_options=ExtraOptions(**extra_options_dict), + extra_options=extra_options, ) diff --git a/optimum/amd/ryzenai/quantization.py b/optimum/amd/ryzenai/quantization.py index 996b0084..a502afab 100644 --- a/optimum/amd/ryzenai/quantization.py +++ b/optimum/amd/ryzenai/quantization.py @@ -169,6 +169,7 @@ def quantize( weight_type=quantization_config.weight_type.value, activation_type=quantization_config.activation_type.value, enable_dpu=quantization_config.enable_dpu, + convert_nchw_to_nhwc=True, extra_options=( quantization_config.extra_options.to_diff_dict(camel_case=True) if quantization_config.extra_options From 1721308d60830d5f0f376dcf287c815c43a99c1a Mon Sep 17 00:00:00 2001 From: ChaoLi-AMD <125451572+ChaoLi-AMD@users.noreply.github.com> Date: Mon, 27 May 2024 16:49:51 +0800 Subject: [PATCH 12/22] update quantization configurations for ryzenai (vai_q_onnx) (#117) * update quantization configurations for ryzenai * add some combination checks * fix typo * change enable_dpu to enable_ipu_cnn --- optimum/amd/ryzenai/configuration.py | 161 +++++++++++++++++++++------ optimum/amd/ryzenai/quantization.py | 4 +- 2 files changed, 126 insertions(+), 39 deletions(-) diff --git a/optimum/amd/ryzenai/configuration.py b/optimum/amd/ryzenai/configuration.py index 5a3aafd2..cbda9089 100644 --- a/optimum/amd/ryzenai/configuration.py +++ b/optimum/amd/ryzenai/configuration.py @@ -5,7 +5,7 @@ import re from dataclasses import asdict, dataclass, field, fields from enum import Enum -from typing import Dict, List, Optional, Tuple, Union +from typing import Dict, List, Literal, Optional, Tuple, Union import vai_q_onnx @@ -36,6 +36,12 @@ class QuantType(Enum): QInt8 = vai_q_onnx.QuantType.QInt8 QUInt8 = vai_q_onnx.QuantType.QUInt8 + QUInt16 = vai_q_onnx.VitisQuantType.QUInt16 + QInt16 = vai_q_onnx.VitisQuantType.QInt16 + QUInt32 = vai_q_onnx.VitisQuantType.QUInt32 + QInt32 = vai_q_onnx.VitisQuantType.QInt32 + QFloat16 = vai_q_onnx.VitisQuantType.QFloat16 + QBFloat16 = vai_q_onnx.VitisQuantType.QBFloat16 @dataclass @@ -95,23 +101,23 @@ class ExtraOptions: By default, minmse is calculated using all calibration data. Alternatively, you can set the mode to "MostCommon", where minmse is calculated for each batch separately and take the most common value. convert_bn_to_conv (`bool`, defaults to `True`): - If True, the BatchNormalization operation will be converted to Conv operation when enable_dpu is True. + If True, the BatchNormalization operation will be converted to Conv operation when enable_ipu_cnn is True. convert_reduce_mean_to_global_avg_pool (`bool`, defaults to `True`): - If True, the Reduce Mean operation will be converted to Global Average Pooling operation when enable_dpu is True. + If True, the Reduce Mean operation will be converted to Global Average Pooling operation when enable_ipu_cnn is True. split_large_kernel_pool (`bool`, defaults to `True`): If True, the large kernel Global Average Pooling operation will be split into multiple Average Pooling operation when - enable_dpu is True. + enable_ipu_cnn is True. convert_split_to_slice (`bool`, defaults to `True`): - If True, the Split operation will be converted to Slice operation when enable_dpu is True. + If True, the Split operation will be converted to Slice operation when enable_ipu_cnn is True. fuse_instance_norm (`bool`, defaults to `False`): - If True, the split instance norm operation will be fused to InstanceNorm operation when enable_dpu is True. + If True, the split instance norm operation will be fused to InstanceNorm operation when enable_ipu_cnn is True. fuse_l2_norm (`bool`, defaults to `False`): - If True, a set of L2norm operations will be fused to L2Norm operation when enable_dpu is True. + If True, a set of L2norm operations will be fused to L2Norm operation when enable_ipu_cnn is True. convert_clip_to_relu (`bool`, defaults to `False`): If True, the Clip operations that have a min value of 0 will be converted to ReLU operations. simulate_dpu (`bool`, defaults to `True`): If True, a simulation transformation that replaces some operations with an approximate implementation will be applied - for DPU when enable_dpu is True. + for DPU when enable_ipu_cnn is True. convert_leaky_relu_to_dpu_version (`bool`, defaults to `True`): If True, the Leaky Relu operation will be converted to DPU version when SimulateDPU is True. convert_sigmoid_to_hard_sigmoid (`bool`, defaults to `True`): @@ -153,9 +159,9 @@ class ExtraOptions: cle_scale_append_bias (`bool`, defaults to `True`): Whether the bias be included when calculating the scale of the weights. remove_qdq_conv_leaky_relu (`bool`, defaults to `False`): - If True, the QDQ between Conv and LeakyRelu will be removed for DPU when enable_dpu is True. + If True, the QDQ between Conv and LeakyRelu will be removed for DPU when enable_ipu_cnn is True. remove_qdq_conv_prelu (`bool`, defaults to `False`): - If True, the QDQ between Conv and PRelu will be removed for DPU when enable_dpu is True. + If True, the QDQ between Conv and PRelu will be removed for DPU when enable_ipu_cnn is True. """ activation_symmetric: bool = False @@ -278,7 +284,7 @@ class QuantizationConfig: The quantization data type to use for the activations. weights_dtype (QuantType, defaults to `QuantType.QInt8`): The quantization data type to use for the weights. - enable_dpu (bool, defaults to `True`): + enable_ipu_cnn (bool, defaults to `True`): Flag to generate a quantized model suitable for DPU/NPU computations. If True, the quantization process will consider specific limitations and requirements of DPU/NPU, optimizing the model accordingly. input_nodes (List[str], defaults to an empty list `[]`): @@ -318,22 +324,22 @@ class QuantizationConfig: Contains key-value pairs for various options in different cases. """ - format: QuantFormat = QuantFormat.QDQ - calibration_method: Union[CalibrationMethod, str] = CalibrationMethod.MinMSE + format: Literal["qdq", "qop", "vitisqdq"] = "qdq" + calibration_method: Literal["nonoverflow", "mse", "minmax", "entropy", "percentile"] = "mse" input_nodes: List[str] = field(default_factory=list) output_nodes: List[str] = field(default_factory=list) op_types_to_quantize: List[str] = field(default_factory=list) random_data_reader_input_shape: Union[List[int], Tuple[int], Dict[str, List[int]]] = field(default_factory=list) per_channel: bool = False reduce_range: bool = False - activation_type: QuantType = QuantType.QInt8 - weight_type: QuantType = QuantType.QInt8 + activations_dtype: Literal["uint8", "int8", "uint16", "int16", "uint32", "int32", "bfloat16", "float16"] = "uint8" + weights_dtype: Literal["uint8", "int8", "uint16", "int16", "uint32", "int32", "bfloat16", "float16"] = "int8" nodes_to_quantize: List[str] = field(default_factory=list) nodes_to_exclude: List[str] = field(default_factory=list) optimize_model: bool = True use_external_data_format: bool = False execution_providers: List[str] = field(default_factory=lambda: ["CPUExecutionProvider"]) - enable_dpu: bool = False + enable_ipu_cnn: bool = False convert_fp16_to_fp32: bool = False convert_nchw_to_nhwc: bool = False include_cle: bool = False @@ -342,6 +348,28 @@ class QuantizationConfig: def __post_init__(self): if isinstance(self.extra_options, dict): self.extra_options = ExtraOptions(**self.extra_options) + self.format = self._map_format(self.format) + self.calibration_method = self._map_calibration_method(self.calibration_method) + self.activations_dtype, self.weights_dtype = self._map_dtypes(self.activations_dtype, self.weights_dtype) + + self.check_dtype_and_format(self.activations_dtype, "activations_dtype", self.format) + self.check_dtype_and_format(self.weights_dtype, "weights_dtype", self.format) + + if self.enable_ipu_cnn: + if self.format not in ["qdq"]: + raise ValueError('ipu cnn configuration only support format "qdq".') + if self.calibration_method not in ["nonoverflow", "mse"]: + raise ValueError('ipu cnn configuration only support calibration_method "nonoverflow" and "mse".') + if not (self.extra_options.activation_symmetric and self.extra_options.weight_symmetric): + raise ValueError( + "ipu cnn configuration requires setting activation_symmetric and weight_symmetric to true." + ) + if self.activations_dtype not in ["uint8", "int8"]: + raise ValueError('ipu cnn configuration only support activations_dtype "uint8" and "int8".') + if self.weights_dtype not in ["int8"]: + raise ValueError('ipu cnn configuration only support weights_dtype "int8".') + if self.per_channel: + raise ValueError("ipu cnn configuration only supports per tensor.") def __setattr__(self, name, value): if name == "extra_options" and isinstance(value, dict): @@ -375,12 +403,71 @@ def to_diff_dict(self) -> dict: return non_default_values @staticmethod - def quantization_type_str(activations_dtype: QuantType, weights_dtype: QuantType) -> str: - return ( - f"{'s8' if activations_dtype == QuantType.QInt8 else 'u8'}" - f"/" - f"{'s8' if weights_dtype == QuantType.QInt8 else 'u8'}" - ) + def check_dtype_and_format(dtype, dtype_name, format): + if dtype not in ["uint8", "int8"] and format not in ["vitisqdq"]: + raise ValueError(f'{dtype_name} is: "{dtype}", format must be "vitisqdq".') + + @staticmethod + def _map_format(format_str): + mapping = { + "qdq": QuantFormat.QDQ, + "qop": QuantFormat.QOperator, + "vitisqdq": QuantFormat.VitisQuantFormat_QDQ, + } + return QuantizationConfig._map_value(mapping, format_str, "format") + + @staticmethod + def _map_calibration_method(method_str): + mapping = { + "nonoverflow": CalibrationMethod.NonOverflow, + "mse": CalibrationMethod.MinMSE, + "minmax": CalibrationMethod.MinMax, + "entropy": CalibrationMethod.Entropy, + "percentile": CalibrationMethod.Percentile, + } + return QuantizationConfig._map_value(mapping, method_str, "calibration method") + + @staticmethod + def _map_dtypes(activations_dtype_str, weights_dtype_str): + mapping = { + "uint8": QuantType.QUInt8, + "int8": QuantType.QInt8, + "uint16": QuantType.QUInt16, + "int16": QuantType.QInt16, + "uint32": QuantType.QUInt32, + "int32": QuantType.QInt32, + "float16": QuantType.QFloat16, + "bfloat16": QuantType.QBFloat16, + } + activations_dtype = QuantizationConfig._map_value(mapping, activations_dtype_str, "activations dtype") + weights_dtype = QuantizationConfig._map_value(mapping, weights_dtype_str, "weights dtype") + return activations_dtype, weights_dtype + + @staticmethod + def _map_value(mapping, value, name): + try: + return mapping[value] + except KeyError: + valid_values = ", ".join(f'"{v}"' for v in mapping.keys()) + raise ValueError(f'{name} only supports the following values: {valid_values}. Received "{value}".') + + @staticmethod + def quantization_type_str(activations_dtype, weights_dtype) -> str: + str_mapping = { + QuantType.QUInt8: "u8", + QuantType.QInt8: "s8", + QuantType.QUInt16: "u16", + QuantType.QInt16: "s16", + QuantType.QUInt32: "u32", + QuantType.QInt32: "s32", + QuantType.QFloat16: "f16", + QuantType.QBFloat16: "bf16", + } + activations_str = str_mapping.get(activations_dtype) + weights_str = str_mapping.get(weights_dtype) + if activations_str is None or weights_str is None: + raise ValueError("Unsupported quantization type") + return f"{activations_str}/{weights_str}" @property def use_symmetric_calibration(self) -> bool: @@ -393,14 +480,14 @@ def __str__(self): return ( f"{self.format} (" f"schema: {QuantizationConfig.quantization_type_str(self.activation_type, self.weight_type)}, " - f"enable_dpu: {self.enable_dpu})" + f"enable_ipu_cnn: {self.enable_ipu_cnn})" ) class AutoQuantizationConfig: @staticmethod def ipu_cnn_config( - calibrate_method: CalibrationMethod = CalibrationMethod.MinMSE, + calibrate_method: Literal["nonoverflow", "mse", "minmax", "entropy", "percentile"] = "mse", nodes_to_quantize: Optional[List[str]] = None, nodes_to_exclude: Optional[List[str]] = None, op_types_to_quantize: Optional[List[str]] = None, @@ -414,12 +501,12 @@ def ipu_cnn_config( extra_options_dict["activation_symmetric"] = extra_options_dict.get("activation_symmetric", True) return QuantizationConfig( - format=QuantFormat.QDQ, + format="qdq", calibration_method=calibrate_method, - activation_type=QuantType.QUInt8, - weight_type=QuantType.QInt8, - enable_dpu=True, - convert_nchw_to_nhwc=True, + activations_dtype="uint8", + weights_dtype="int8", + enable_ipu_cnn=True, + op_types_to_quantize=op_types_to_quantize, nodes_to_quantize=nodes_to_quantize or [], nodes_to_exclude=nodes_to_exclude or [], @@ -428,7 +515,7 @@ def ipu_cnn_config( @staticmethod def ipu_transformer_config( - calibrate_method: CalibrationMethod = CalibrationMethod.MinMax, + calibrate_method: Literal["nonoverflow", "mse", "minmax", "entropy", "percentile"] = "minmax", nodes_to_quantize: Optional[List[str]] = None, nodes_to_exclude: Optional[List[str]] = None, op_types_to_quantize: Optional[List[str]] = None, @@ -442,10 +529,10 @@ def ipu_transformer_config( extra_options_dict["activation_symmetric"] = extra_options_dict.get("activation_symmetric", True) return QuantizationConfig( - format=QuantFormat.QDQ, + format="qdq", calibration_method=calibrate_method, - activation_type=QuantType.QInt8, - weight_type=QuantType.QInt8, + activations_dtype="int8", + weights_dtype="int8", op_types_to_quantize=op_types_to_quantize, nodes_to_quantize=nodes_to_quantize or [], nodes_to_exclude=nodes_to_exclude or [], @@ -454,7 +541,7 @@ def ipu_transformer_config( @staticmethod def cpu_cnn_config( - calibrate_method: CalibrationMethod = CalibrationMethod.MinMax, + calibrate_method: Literal["nonoverflow", "mse", "minmax", "entropy", "percentile"] = "minmax", nodes_to_quantize: Optional[List[str]] = None, nodes_to_exclude: Optional[List[str]] = None, op_types_to_quantize: Optional[List[str]] = None, @@ -465,10 +552,10 @@ def cpu_cnn_config( extra_options = ExtraOptions(**extra_options) return QuantizationConfig( - format=QuantFormat.QDQ, + format="qdq", calibration_method=calibrate_method, - activation_type=QuantType.QUInt8, - weight_type=QuantType.QInt8, + activations_dtype="uint8", + weights_dtype="int8", op_types_to_quantize=op_types_to_quantize, nodes_to_quantize=nodes_to_quantize or [], nodes_to_exclude=nodes_to_exclude or [], diff --git a/optimum/amd/ryzenai/quantization.py b/optimum/amd/ryzenai/quantization.py index a502afab..3417b645 100644 --- a/optimum/amd/ryzenai/quantization.py +++ b/optimum/amd/ryzenai/quantization.py @@ -168,8 +168,8 @@ def quantize( calibrate_method=quantization_config.calibration_method.value, weight_type=quantization_config.weight_type.value, activation_type=quantization_config.activation_type.value, - enable_dpu=quantization_config.enable_dpu, - convert_nchw_to_nhwc=True, + enable_ipu_cnn=quantization_config.enable_ipu_cnn, + extra_options=( quantization_config.extra_options.to_diff_dict(camel_case=True) if quantization_config.extra_options From db3ca0008e55a186ab7e08555b64240ba42acdb1 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Fri, 31 May 2024 15:11:42 +0530 Subject: [PATCH 13/22] fix style --- optimum/amd/ryzenai/configuration.py | 1 - optimum/amd/ryzenai/quantization.py | 1 - tests/brevitas/test_onnx_export.py | 2 +- tests/brevitas/test_quantization.py | 5 +++-- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/optimum/amd/ryzenai/configuration.py b/optimum/amd/ryzenai/configuration.py index cbda9089..6edb4567 100644 --- a/optimum/amd/ryzenai/configuration.py +++ b/optimum/amd/ryzenai/configuration.py @@ -506,7 +506,6 @@ def ipu_cnn_config( activations_dtype="uint8", weights_dtype="int8", enable_ipu_cnn=True, - op_types_to_quantize=op_types_to_quantize, nodes_to_quantize=nodes_to_quantize or [], nodes_to_exclude=nodes_to_exclude or [], diff --git a/optimum/amd/ryzenai/quantization.py b/optimum/amd/ryzenai/quantization.py index 4a09cb70..68da41a4 100644 --- a/optimum/amd/ryzenai/quantization.py +++ b/optimum/amd/ryzenai/quantization.py @@ -173,7 +173,6 @@ def quantize( weight_type=quantization_config.weight_type.value, activation_type=quantization_config.activation_type.value, enable_ipu_cnn=quantization_config.enable_ipu_cnn, - extra_options=( quantization_config.extra_options.to_diff_dict(camel_case=True) if quantization_config.extra_options diff --git a/tests/brevitas/test_onnx_export.py b/tests/brevitas/test_onnx_export.py index 836d21b8..f92bdb32 100644 --- a/tests/brevitas/test_onnx_export.py +++ b/tests/brevitas/test_onnx_export.py @@ -9,11 +9,11 @@ import onnx import torch -from brevitas.export.onnx.standard.qcdq.manager import StdQCDQONNXManager from brevitas_examples.llm.llm_quant.export import brevitas_proxy_export_mode from parameterized import parameterized from testing_utils import SUPPORTED_MODELS_TINY, VALIDATE_EXPORT_ON_SHAPES, get_quantized_model +from brevitas.export.onnx.standard.qcdq.manager import StdQCDQONNXManager from optimum.amd.brevitas.export import find_and_insert_matmulinteger from optimum.exporters import TasksManager from optimum.exporters.onnx import ( diff --git a/tests/brevitas/test_quantization.py b/tests/brevitas/test_quantization.py index e37992f1..91a81ec0 100644 --- a/tests/brevitas/test_quantization.py +++ b/tests/brevitas/test_quantization.py @@ -4,11 +4,12 @@ import unittest import torch -from brevitas.nn.quant_linear import QuantLinear -from brevitas.proxy.runtime_quant import ActQuantProxyFromInjector, DynamicActQuantProxyFromInjector from parameterized import parameterized from testing_utils import SUPPORTED_MODELS_TINY, get_quantized_model +from brevitas.nn.quant_linear import QuantLinear +from brevitas.proxy.runtime_quant import ActQuantProxyFromInjector, DynamicActQuantProxyFromInjector + def _get_all_model_ids(model_type: str): if isinstance(SUPPORTED_MODELS_TINY[model_type], str): From 0d8b1a37ad4fdf48d61d4b210856e015a79f30ff Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Fri, 31 May 2024 16:46:56 +0530 Subject: [PATCH 14/22] fix options --- optimum/amd/ryzenai/configuration.py | 37 +++++++++++++++++++--------- optimum/amd/ryzenai/quantization.py | 18 ++++++++++++-- 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/optimum/amd/ryzenai/configuration.py b/optimum/amd/ryzenai/configuration.py index 6edb4567..02765559 100644 --- a/optimum/amd/ryzenai/configuration.py +++ b/optimum/amd/ryzenai/configuration.py @@ -173,7 +173,7 @@ class ExtraOptions: force_quantize_no_input_check: bool = False matmul_const_b_only: bool = False add_qdq_pair_to_weight: bool = False - op_types_to_exclude_output_quantization: Union[List[str], None] = None + op_types_to_exclude_output_quantization: Union[List[str], None] = field(default_factory=list) dedicated_qdq_pair: bool = False qdq_op_type_per_channel_support_to_axis: Dict = field(default_factory=dict) use_qdq_vitis_custom_ops: bool = True @@ -248,7 +248,11 @@ def to_diff_dict(self, camel_case=False) -> dict: ) else: name = option.name - if getattr(self, option.name) != option.default and getattr(self, option.name) != {}: + if ( + getattr(self, option.name) != option.default + and getattr(self, option.name) != {} + and getattr(self, option.name) != [] + ): non_default_values[name] = getattr(self, option.name) return non_default_values @@ -300,9 +304,9 @@ class QuantizationConfig: Determines whether weights should be quantized per channel. Must be False for DPU/NPU devices. reduce_range (bool, defaults to `False`): If True, quantizes weights with 7-bits. Must be False for DPU/NPU devices. - activation_type (QuantType, defaults to `QuantType.QInt8`): + activations_dtype (QuantType, defaults to `QuantType.QInt8`): Specifies the quantization data type for activations. - weight_type (QuantType, defaults to `QuantType.QInt8`): + weights_dtype (QuantType, defaults to `QuantType.QInt8`): Specifies the quantization data type for weights. Must be `QuantType.QInt8` for NPU devices. nodes_to_quantize (List[str], defaults to an empty list `[]`): If specified, only the nodes in this list are quantized. @@ -348,9 +352,9 @@ class QuantizationConfig: def __post_init__(self): if isinstance(self.extra_options, dict): self.extra_options = ExtraOptions(**self.extra_options) - self.format = self._map_format(self.format) - self.calibration_method = self._map_calibration_method(self.calibration_method) - self.activations_dtype, self.weights_dtype = self._map_dtypes(self.activations_dtype, self.weights_dtype) + + if self.calibration_method in {"mse", "overflow"}: + self.extra_options.calib_tensor_range_symmetric = True self.check_dtype_and_format(self.activations_dtype, "activations_dtype", self.format) self.check_dtype_and_format(self.weights_dtype, "weights_dtype", self.format) @@ -377,6 +381,16 @@ def __setattr__(self, name, value): else: super().__setattr__(name, value) + def __getattr__(self, name): + value = getattr(self, name) + if name == "format": + value = self._map_format(value) + elif name == "calibration_method": + value = self._map_calibration_method(value) + elif name in ["activations_dtype", "weights_dtype"]: + value = self._map_dtypes(value, name) + return value + def to_diff_dict(self) -> dict: """ Returns a dictionary of non-default values in the configuration. @@ -428,7 +442,7 @@ def _map_calibration_method(method_str): return QuantizationConfig._map_value(mapping, method_str, "calibration method") @staticmethod - def _map_dtypes(activations_dtype_str, weights_dtype_str): + def _map_dtypes(dtype, name): mapping = { "uint8": QuantType.QUInt8, "int8": QuantType.QInt8, @@ -439,9 +453,8 @@ def _map_dtypes(activations_dtype_str, weights_dtype_str): "float16": QuantType.QFloat16, "bfloat16": QuantType.QBFloat16, } - activations_dtype = QuantizationConfig._map_value(mapping, activations_dtype_str, "activations dtype") - weights_dtype = QuantizationConfig._map_value(mapping, weights_dtype_str, "weights dtype") - return activations_dtype, weights_dtype + dtype = QuantizationConfig._map_value(mapping, dtype, name) + return dtype @staticmethod def _map_value(mapping, value, name): @@ -479,7 +492,7 @@ def use_symmetric_calibration(self) -> bool: def __str__(self): return ( f"{self.format} (" - f"schema: {QuantizationConfig.quantization_type_str(self.activation_type, self.weight_type)}, " + f"schema: {QuantizationConfig.quantization_type_str(self.activations_dtype, self.weights_dtype)}, " f"enable_ipu_cnn: {self.enable_ipu_cnn})" ) diff --git a/optimum/amd/ryzenai/quantization.py b/optimum/amd/ryzenai/quantization.py index 68da41a4..2d01c0e0 100644 --- a/optimum/amd/ryzenai/quantization.py +++ b/optimum/amd/ryzenai/quantization.py @@ -170,9 +170,23 @@ def quantize( calibration_data_reader=reader, quant_format=quantization_config.format.value, calibrate_method=quantization_config.calibration_method.value, - weight_type=quantization_config.weight_type.value, - activation_type=quantization_config.activation_type.value, + input_nodes=quantization_config.input_nodes, + output_nodes=quantization_config.output_nodes, + op_types_to_quantize=quantization_config.op_types_to_quantize, + random_data_reader_input_shape=quantization_config.random_data_reader_input_shape, + per_channel=quantization_config.per_channel, + reduce_range=quantization_config.reduce_range, + activation_type=quantization_config.activations_dtype.value, + weight_type=quantization_config.weights_dtype.value, + nodes_to_quantize=quantization_config.nodes_to_quantize, + nodes_to_exclude=quantization_config.nodes_to_exclude, + optimize_model=quantization_config.optimize_model, + use_external_data_format=quantization_config.use_external_data_format, + execution_providers=quantization_config.execution_providers, enable_ipu_cnn=quantization_config.enable_ipu_cnn, + convert_fp16_to_fp32=quantization_config.convert_fp16_to_fp32, + convert_nchw_to_nhwc=quantization_config.convert_nchw_to_nhwc, + include_cle=quantization_config.include_cle, extra_options=( quantization_config.extra_options.to_diff_dict(camel_case=True) if quantization_config.extra_options From 4385b77d56f32534728913dd880eb625fc24ce14 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Mon, 3 Jun 2024 18:12:47 +0530 Subject: [PATCH 15/22] fix options --- optimum/amd/ryzenai/configuration.py | 43 +++++++++++++++------------- optimum/amd/ryzenai/quantization.py | 13 ++++++--- 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/optimum/amd/ryzenai/configuration.py b/optimum/amd/ryzenai/configuration.py index 02765559..057d97cf 100644 --- a/optimum/amd/ryzenai/configuration.py +++ b/optimum/amd/ryzenai/configuration.py @@ -421,17 +421,15 @@ def check_dtype_and_format(dtype, dtype_name, format): if dtype not in ["uint8", "int8"] and format not in ["vitisqdq"]: raise ValueError(f'{dtype_name} is: "{dtype}", format must be "vitisqdq".') - @staticmethod - def _map_format(format_str): + def map_format(self): mapping = { "qdq": QuantFormat.QDQ, "qop": QuantFormat.QOperator, "vitisqdq": QuantFormat.VitisQuantFormat_QDQ, } - return QuantizationConfig._map_value(mapping, format_str, "format") + return QuantizationConfig._map_value(mapping, self.format, "format") - @staticmethod - def _map_calibration_method(method_str): + def map_calibration_method(self): mapping = { "nonoverflow": CalibrationMethod.NonOverflow, "mse": CalibrationMethod.MinMSE, @@ -439,10 +437,10 @@ def _map_calibration_method(method_str): "entropy": CalibrationMethod.Entropy, "percentile": CalibrationMethod.Percentile, } - return QuantizationConfig._map_value(mapping, method_str, "calibration method") + return QuantizationConfig._map_value(mapping, self.calibration_method, "calibration method") - @staticmethod - def _map_dtypes(dtype, name): + @property + def _dtype_mapping(self): mapping = { "uint8": QuantType.QUInt8, "int8": QuantType.QInt8, @@ -453,8 +451,13 @@ def _map_dtypes(dtype, name): "float16": QuantType.QFloat16, "bfloat16": QuantType.QBFloat16, } - dtype = QuantizationConfig._map_value(mapping, dtype, name) - return dtype + return mapping + + def map_activations_dtype(self): + return QuantizationConfig._map_value(self._dtype_mapping, self.activations_dtype, "Activations dtype") + + def map_weights_dtype(self): + return QuantizationConfig._map_value(self._dtype_mapping, self.weights_dtype, "Weights dtype") @staticmethod def _map_value(mapping, value, name): @@ -501,16 +504,16 @@ class AutoQuantizationConfig: @staticmethod def ipu_cnn_config( calibrate_method: Literal["nonoverflow", "mse", "minmax", "entropy", "percentile"] = "mse", - nodes_to_quantize: Optional[List[str]] = None, - nodes_to_exclude: Optional[List[str]] = None, - op_types_to_quantize: Optional[List[str]] = None, + nodes_to_quantize: List[str] = [], + nodes_to_exclude: List[str] = [], + op_types_to_quantize: List[str] = [], extra_options: Optional[Union[Dict[str, bool], ExtraOptions]] = None, ): extra_options = extra_options or {} if isinstance(extra_options, dict): extra_options = ExtraOptions(**extra_options) - extra_options_dict = extra_options.__dict__ + extra_options_dict = extra_options.to_diff_dict() extra_options_dict["activation_symmetric"] = extra_options_dict.get("activation_symmetric", True) return QuantizationConfig( @@ -528,9 +531,9 @@ def ipu_cnn_config( @staticmethod def ipu_transformer_config( calibrate_method: Literal["nonoverflow", "mse", "minmax", "entropy", "percentile"] = "minmax", - nodes_to_quantize: Optional[List[str]] = None, - nodes_to_exclude: Optional[List[str]] = None, - op_types_to_quantize: Optional[List[str]] = None, + nodes_to_quantize: List[str] = [], + nodes_to_exclude: List[str] = [], + op_types_to_quantize: List[str] = [], extra_options: Optional[Union[Dict[str, bool], ExtraOptions]] = None, ): extra_options = extra_options or {} @@ -554,9 +557,9 @@ def ipu_transformer_config( @staticmethod def cpu_cnn_config( calibrate_method: Literal["nonoverflow", "mse", "minmax", "entropy", "percentile"] = "minmax", - nodes_to_quantize: Optional[List[str]] = None, - nodes_to_exclude: Optional[List[str]] = None, - op_types_to_quantize: Optional[List[str]] = None, + nodes_to_quantize: List[str] = [], + nodes_to_exclude: List[str] = [], + op_types_to_quantize: List[str] = [], extra_options: Optional[Union[Dict[str, bool], ExtraOptions]] = None, ): extra_options = extra_options or {} diff --git a/optimum/amd/ryzenai/quantization.py b/optimum/amd/ryzenai/quantization.py index 2d01c0e0..7be3c294 100644 --- a/optimum/amd/ryzenai/quantization.py +++ b/optimum/amd/ryzenai/quantization.py @@ -164,20 +164,25 @@ def quantize( LOGGER.info("Quantizing model...") + format = quantization_config.map_format().value + calibration_method = quantization_config.map_calibration_method().value + activation_type = quantization_config.map_activations_dtype().value + weight_type = quantization_config.map_weights_dtype().value + quantize_static( model_input=Path(self.onnx_model_path).as_posix(), model_output=quantized_model_path.as_posix(), calibration_data_reader=reader, - quant_format=quantization_config.format.value, - calibrate_method=quantization_config.calibration_method.value, + quant_format=format, + calibrate_method=calibration_method, input_nodes=quantization_config.input_nodes, output_nodes=quantization_config.output_nodes, op_types_to_quantize=quantization_config.op_types_to_quantize, random_data_reader_input_shape=quantization_config.random_data_reader_input_shape, per_channel=quantization_config.per_channel, reduce_range=quantization_config.reduce_range, - activation_type=quantization_config.activations_dtype.value, - weight_type=quantization_config.weights_dtype.value, + activation_type=activation_type, + weight_type=weight_type, nodes_to_quantize=quantization_config.nodes_to_quantize, nodes_to_exclude=quantization_config.nodes_to_exclude, optimize_model=quantization_config.optimize_model, From 94b3f805cf1dba94de5de07ce3947ace86475f61 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 4 Jun 2024 17:16:11 +0530 Subject: [PATCH 16/22] add tests --- optimum/amd/ryzenai/configuration.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/optimum/amd/ryzenai/configuration.py b/optimum/amd/ryzenai/configuration.py index 057d97cf..54c87a92 100644 --- a/optimum/amd/ryzenai/configuration.py +++ b/optimum/amd/ryzenai/configuration.py @@ -523,8 +523,8 @@ def ipu_cnn_config( weights_dtype="int8", enable_ipu_cnn=True, op_types_to_quantize=op_types_to_quantize, - nodes_to_quantize=nodes_to_quantize or [], - nodes_to_exclude=nodes_to_exclude or [], + nodes_to_quantize=nodes_to_quantize, + nodes_to_exclude=nodes_to_exclude, extra_options=ExtraOptions(**extra_options_dict), ) @@ -540,7 +540,7 @@ def ipu_transformer_config( if isinstance(extra_options, dict): extra_options = ExtraOptions(**extra_options) - extra_options_dict = extra_options.__dict__ + extra_options_dict = extra_options.to_diff_dict() extra_options_dict["activation_symmetric"] = extra_options_dict.get("activation_symmetric", True) return QuantizationConfig( @@ -549,8 +549,8 @@ def ipu_transformer_config( activations_dtype="int8", weights_dtype="int8", op_types_to_quantize=op_types_to_quantize, - nodes_to_quantize=nodes_to_quantize or [], - nodes_to_exclude=nodes_to_exclude or [], + nodes_to_quantize=nodes_to_quantize, + nodes_to_exclude=nodes_to_exclude, extra_options=ExtraOptions(**extra_options_dict), ) @@ -572,8 +572,8 @@ def cpu_cnn_config( activations_dtype="uint8", weights_dtype="int8", op_types_to_quantize=op_types_to_quantize, - nodes_to_quantize=nodes_to_quantize or [], - nodes_to_exclude=nodes_to_exclude or [], + nodes_to_quantize=nodes_to_quantize, + nodes_to_exclude=nodes_to_exclude, extra_options=extra_options, ) From 4684dea0e97dd332a3b0daab15fe74c7cfec405a Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 4 Jun 2024 17:44:32 +0530 Subject: [PATCH 17/22] add config options --- tests/ryzenai/test_configuration.py | 173 ++++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 tests/ryzenai/test_configuration.py diff --git a/tests/ryzenai/test_configuration.py b/tests/ryzenai/test_configuration.py new file mode 100644 index 00000000..015f049e --- /dev/null +++ b/tests/ryzenai/test_configuration.py @@ -0,0 +1,173 @@ +import unittest + +from parameterized import parameterized + +from optimum.amd.ryzenai import AutoQuantizationConfig, ExtraOptions, QuantizationConfig + + +class TestExtraOptions(unittest.TestCase): + def test_default_values(self): + options = ExtraOptions() + self.assertEqual(options.activation_symmetric, False) + self.assertEqual(options.weight_symmetric, True) + self.assertEqual(options.use_unsigned_relu, False) + self.assertEqual(options.quantize_bias, True) + self.assertEqual(options.remove_input_init, True) + self.assertEqual(options.enable_subgraph, False) + self.assertEqual(options.force_quantize_no_input_check, False) + self.assertEqual(options.matmul_const_b_only, False) + self.assertEqual(options.add_qdq_pair_to_weight, False) + self.assertEqual(options.op_types_to_exclude_output_quantization, []) + self.assertEqual(options.dedicated_qdq_pair, False) + self.assertEqual(options.qdq_op_type_per_channel_support_to_axis, {}) + self.assertEqual(options.use_qdq_vitis_custom_ops, True) + self.assertEqual(options.calib_tensor_range_symmetric, False) + self.assertEqual(options.calib_moving_average, False) + self.assertEqual(options.calib_moving_average_constant, 0.01) + self.assertEqual(options.random_data_reader_input_data_range, None) + self.assertEqual(options.int16_scale, False) + self.assertEqual(options.min_mse_mode, "All") + self.assertEqual(options.convert_bn_to_conv, True) + self.assertEqual(options.convert_reduce_mean_to_global_avg_pool, True) + self.assertEqual(options.split_large_kernel_pool, True) + self.assertEqual(options.convert_split_to_slice, True) + self.assertEqual(options.fuse_instance_norm, False) + self.assertEqual(options.fuse_l2_norm, False) + self.assertEqual(options.convert_clip_to_relu, False) + self.assertEqual(options.simulate_dpu, True) + self.assertEqual(options.convert_leaky_relu_to_dpu_version, True) + self.assertEqual(options.convert_sigmoid_to_hard_sigmoid, True) + self.assertEqual(options.convert_hard_sigmoid_to_dpu_version, True) + self.assertEqual(options.convert_avg_pool_to_dpu_version, True) + self.assertEqual(options.convert_reduce_mean_to_dpu_version, True) + self.assertEqual(options.convert_softmax_to_dpu_version, False) + self.assertEqual(options.ipu_limitation_check, True) + self.assertEqual(options.adjust_shift_cut, True) + self.assertEqual(options.adjust_shift_bias, True) + self.assertEqual(options.adjust_shift_read, True) + self.assertEqual(options.adjust_shift_write, True) + self.assertEqual(options.adjust_hard_sigmoid, True) + self.assertEqual(options.adjust_shift_swish, True) + self.assertEqual(options.align_concat, True) + self.assertEqual(options.align_pool, True) + self.assertEqual(options.replace_clip6_relu, False) + self.assertEqual(options.cle_steps, 1) + self.assertEqual(options.cle_total_layer_diff_threshold, 2e-7) + self.assertEqual(options.cle_scale_append_bias, True) + self.assertEqual(options.remove_qdq_conv_leaky_relu, False) + self.assertEqual(options.remove_qdq_conv_prelu, False) + + def test_snake_to_camel(self): + options = ExtraOptions() + camel_case_dict = options.snake_to_camel + self.assertEqual( + camel_case_dict["qdq_op_type_per_channel_support_to_axis"], "QDQOpTypePerChannelSupportToAxis" + ) + self.assertEqual(camel_case_dict["ipu_limitation_check"], "IPULimitationCheck") + self.assertEqual(camel_case_dict["cle_steps"], "CLESteps") + self.assertEqual(camel_case_dict["cle_total_layer_diff_threshold"], "CLETotalLayerDiffThreshold") + self.assertEqual(camel_case_dict["cle_scale_append_bias"], "CLEScaleAppendBias") + + def test_camel_to_snake_setattr(self): + options = ExtraOptions() + options.QDQOpTypePerChannelSupportToAxis = "some_value" + options.IPULimitationCheck = False + options.CLESteps = 5 + options.CLETotalLayerDiffThreshold = 1e-7 + options.CLEScaleAppendBias = False + self.assertEqual(options.qdq_op_type_per_channel_support_to_axis, "some_value") + self.assertFalse(options.ipu_limitation_check) + self.assertEqual(options.cle_steps, 5) + self.assertEqual(options.cle_total_layer_diff_threshold, 1e-7) + self.assertFalse(options.cle_scale_append_bias) + + def test_to_diff_dict(self): + options = ExtraOptions(activation_symmetric=True) + diff_dict = options.to_diff_dict() + self.assertEqual(diff_dict, {"activation_symmetric": True}) + + @parameterized.expand( + [ + ("activation_symmetric", False, True), + ("weight_symmetric", True, False), + ("use_unsigned_relu", False, True), + ] + ) + def test_parametric_setting_attributes(self, attribute, default_value, new_value): + options = ExtraOptions() + self.assertEqual(getattr(options, attribute), default_value) + setattr(options, attribute, new_value) + self.assertEqual(getattr(options, attribute), new_value) + + +class TestQuantizationConfig(unittest.TestCase): + def test_default_values(self): + config = QuantizationConfig() + self.assertEqual(config.format, "qdq") + self.assertEqual(config.calibration_method, "mse") + self.assertEqual(config.input_nodes, []) + self.assertEqual(config.output_nodes, []) + self.assertEqual(config.op_types_to_quantize, []) + self.assertEqual(config.random_data_reader_input_shape, []) + self.assertFalse(config.per_channel) + self.assertFalse(config.reduce_range) + self.assertEqual(config.activations_dtype, "uint8") + self.assertEqual(config.weights_dtype, "int8") + self.assertEqual(config.nodes_to_quantize, []) + self.assertEqual(config.nodes_to_exclude, []) + self.assertTrue(config.optimize_model) + self.assertFalse(config.use_external_data_format) + self.assertEqual(config.execution_providers, ["CPUExecutionProvider"]) + self.assertFalse(config.enable_ipu_cnn) + self.assertFalse(config.convert_fp16_to_fp32) + self.assertFalse(config.convert_nchw_to_nhwc) + self.assertFalse(config.include_cle) + self.assertIsInstance(config.extra_options, ExtraOptions) + + def test_extra_options_initialization(self): + extra_options = ExtraOptions(activation_symmetric=True) + config = QuantizationConfig(extra_options=extra_options) + self.assertEqual(config.extra_options.activation_symmetric, True) + + def test_to_diff_dict(self): + config = QuantizationConfig(format="qop", calibration_method="entropy") + diff_dict = config.to_diff_dict() + self.assertEqual(diff_dict["format"], "qop") + self.assertEqual(diff_dict["calibration_method"], "entropy") + + def test_use_symmetric_calibration(self): + config = QuantizationConfig(extra_options=ExtraOptions(activation_symmetric=True, weight_symmetric=True)) + self.assertTrue(config.use_symmetric_calibration) + + @parameterized.expand( + [ + ("format", "qdq", "qop"), + ("calibration_method", "mse", "entropy"), + ("activations_dtype", "uint8", "int8"), + ("weights_dtype", "int8", "uint8"), + ] + ) + def test_parametric_setting_attributes(self, attribute, default_value, new_value): + config = QuantizationConfig() + self.assertEqual(getattr(config, attribute), default_value) + setattr(config, attribute, new_value) + self.assertEqual(getattr(config, attribute), new_value) + + +class TestAutoQuantizationConfig(unittest.TestCase): + def test_ipu_cnn_config(self): + config = AutoQuantizationConfig.ipu_cnn_config() + self.assertEqual(config.format, "qdq") + self.assertEqual(config.calibration_method, "mse") + self.assertTrue(config.extra_options.activation_symmetric) + + def test_ipu_transformer_config(self): + config = AutoQuantizationConfig.ipu_transformer_config() + self.assertEqual(config.format, "qdq") + self.assertEqual(config.calibration_method, "minmax") + self.assertTrue(config.extra_options.activation_symmetric) + + def test_cpu_cnn_config(self): + config = AutoQuantizationConfig.cpu_cnn_config() + self.assertEqual(config.format, "qdq") + self.assertEqual(config.calibration_method, "minmax") From 8cc538f70e99affd4e67c768c6dc08c7cf540f64 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 4 Jun 2024 17:49:13 +0530 Subject: [PATCH 18/22] fix style --- tests/brevitas/test_onnx_export.py | 2 +- tests/brevitas/test_quantization.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tests/brevitas/test_onnx_export.py b/tests/brevitas/test_onnx_export.py index f92bdb32..836d21b8 100644 --- a/tests/brevitas/test_onnx_export.py +++ b/tests/brevitas/test_onnx_export.py @@ -9,11 +9,11 @@ import onnx import torch +from brevitas.export.onnx.standard.qcdq.manager import StdQCDQONNXManager from brevitas_examples.llm.llm_quant.export import brevitas_proxy_export_mode from parameterized import parameterized from testing_utils import SUPPORTED_MODELS_TINY, VALIDATE_EXPORT_ON_SHAPES, get_quantized_model -from brevitas.export.onnx.standard.qcdq.manager import StdQCDQONNXManager from optimum.amd.brevitas.export import find_and_insert_matmulinteger from optimum.exporters import TasksManager from optimum.exporters.onnx import ( diff --git a/tests/brevitas/test_quantization.py b/tests/brevitas/test_quantization.py index 91a81ec0..e37992f1 100644 --- a/tests/brevitas/test_quantization.py +++ b/tests/brevitas/test_quantization.py @@ -4,11 +4,10 @@ import unittest import torch -from parameterized import parameterized -from testing_utils import SUPPORTED_MODELS_TINY, get_quantized_model - from brevitas.nn.quant_linear import QuantLinear from brevitas.proxy.runtime_quant import ActQuantProxyFromInjector, DynamicActQuantProxyFromInjector +from parameterized import parameterized +from testing_utils import SUPPORTED_MODELS_TINY, get_quantized_model def _get_all_model_ids(model_type: str): From 41143d9bc416caa2c97df585fc802342929d7dbc Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Tue, 4 Jun 2024 20:55:37 +0530 Subject: [PATCH 19/22] fix token --- .github/workflows/test_ryzenai_nightly.yaml | 2 +- .github/workflows/test_ryzenai_quantization_timm.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test_ryzenai_nightly.yaml b/.github/workflows/test_ryzenai_nightly.yaml index f573d1a1..cf1eb81d 100644 --- a/.github/workflows/test_ryzenai_nightly.yaml +++ b/.github/workflows/test_ryzenai_nightly.yaml @@ -26,7 +26,7 @@ jobs: slow_test: true timeout_minutes: 1200 secrets: - hf_hub_read_token: ${{ secrets.HF_READ_TOKEN }} + hf_hub_read_token: ${{ secrets.HF_HUB_READ_TOKEN }} send_results: name: Send results to webhook runs-on: ubuntu-22.04 diff --git a/.github/workflows/test_ryzenai_quantization_timm.yaml b/.github/workflows/test_ryzenai_quantization_timm.yaml index bc313660..d8900331 100644 --- a/.github/workflows/test_ryzenai_quantization_timm.yaml +++ b/.github/workflows/test_ryzenai_quantization_timm.yaml @@ -27,4 +27,4 @@ jobs: with: test_file: "tests/ryzenai/test_quantization.py" secrets: - hf_hub_read_token: ${{ secrets.HF_READ_TOKEN }} + hf_hub_read_token: ${{ secrets.HF_HUB_READ_TOKEN }} From 409d43c504821c3eb9b9f64bca73896a767f8951 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Wed, 5 Jun 2024 15:15:34 +0530 Subject: [PATCH 20/22] addressed comments --- optimum/amd/ryzenai/configuration.py | 234 ++++++++++++--------------- optimum/amd/ryzenai/quantization.py | 15 +- tests/ryzenai/test_configuration.py | 36 ++--- 3 files changed, 122 insertions(+), 163 deletions(-) diff --git a/optimum/amd/ryzenai/configuration.py b/optimum/amd/ryzenai/configuration.py index 54c87a92..8e1382ea 100644 --- a/optimum/amd/ryzenai/configuration.py +++ b/optimum/amd/ryzenai/configuration.py @@ -12,6 +12,33 @@ from optimum.configuration_utils import BaseConfig +QUANT_TYPE_MAPPING = { + "uint8": vai_q_onnx.QuantType.QUInt8, + "int8": vai_q_onnx.QuantType.QInt8, + "uint16": vai_q_onnx.VitisQuantType.QUInt16, + "int16": vai_q_onnx.VitisQuantType.QInt16, + "uint32": vai_q_onnx.VitisQuantType.QUInt32, + "int32": vai_q_onnx.VitisQuantType.QInt32, + "float16": vai_q_onnx.VitisQuantType.QFloat16, + "bfloat16": vai_q_onnx.VitisQuantType.QBFloat16, +} + +QUANT_FORMAT_MAPPING = { + "qop": vai_q_onnx.QuantFormat.QOperator, + "qdq": vai_q_onnx.QuantFormat.QDQ, + "vitisqdq": vai_q_onnx.VitisQuantFormat.QDQ, + "vitisfixneuron": vai_q_onnx.VitisQuantFormat.FixNeuron, +} + +CALIBRATION_METHOD_MAPPING = { + "minmax": vai_q_onnx.CalibrationMethod.MinMax, + "entropy": vai_q_onnx.CalibrationMethod.Entropy, + "percentile": vai_q_onnx.CalibrationMethod.Percentile, + "nonoverflow": vai_q_onnx.PowerOfTwoMethod.NonOverflow, + "mse": vai_q_onnx.PowerOfTwoMethod.MinMSE, +} + + class CalibrationMethod(Enum): """CalibrationMethod is an enumeration of the calibration methods supported by RyzenAI quantization.""" @@ -22,28 +49,6 @@ class CalibrationMethod(Enum): MinMSE = vai_q_onnx.PowerOfTwoMethod.MinMSE -class QuantFormat(Enum): - """QuantFormat is an enumeration of the quantization formats supported by RyzenAI quantization.""" - - QOperator = vai_q_onnx.QuantFormat.QOperator - QDQ = vai_q_onnx.QuantFormat.QDQ - VitisQuantFormat_QDQ = vai_q_onnx.VitisQuantFormat.QDQ - VitisQuantFormat_FixNeuron = vai_q_onnx.VitisQuantFormat.FixNeuron - - -class QuantType(Enum): - """QuantType is an enumeration of the quantization types supported by RyzenAI quantization.""" - - QInt8 = vai_q_onnx.QuantType.QInt8 - QUInt8 = vai_q_onnx.QuantType.QUInt8 - QUInt16 = vai_q_onnx.VitisQuantType.QUInt16 - QInt16 = vai_q_onnx.VitisQuantType.QInt16 - QUInt32 = vai_q_onnx.VitisQuantType.QUInt32 - QInt32 = vai_q_onnx.VitisQuantType.QInt32 - QFloat16 = vai_q_onnx.VitisQuantType.QFloat16 - QBFloat16 = vai_q_onnx.VitisQuantType.QBFloat16 - - @dataclass class ExtraOptions: """ @@ -265,7 +270,7 @@ class QuantizationConfig: Args: is_static (`bool`): Whether to apply static quantization or dynamic quantization. - format (Union[QuantFormat, str], defaults to `QuantFormat.QDQ`): + format (`Union[QuantFormat, str]`, defaults to `QuantFormat.QDQ`): This parameter is used to specify the quantization format of the model. Options: - `QuantFormat.QOperator`: Quantizes the model directly using quantized operators. @@ -275,7 +280,7 @@ class QuantizationConfig: into the tensor. Supports a wider range of bit-widths and precisions. - `QuantFormat.FixNeuron` (Experimental): Quantizes the model by inserting FixNeuron (a combination of QuantizeLinear and DeQuantizeLinear) into the tensor. Experimental and not recommended for deployment. - calibration_method (Union[CalibrationMethod, str], defaults to `CalibrationMethod.MinMSE`): + calibration_method (`Union[CalibrationMethod, str]`, defaults to `CalibrationMethod.MinMSE`): The method used in calibration. Options (for CNNs running on NPU, power-of-two methods; for Transformers on NPU or CNNs on CPU, float scale methods): - `CalibrationMethod.NonOverflow`: Power-of-two method to prevent min/max values from overflowing. @@ -284,47 +289,47 @@ class QuantizationConfig: - `CalibrationMethod.MinMax`: Obtain quantization parameters based on minimum and maximum values of each tensor. - `CalibrationMethod.Entropy`: Determine quantization parameters based on the entropy algorithm of each tensor's distribution. - `CalibrationMethod.Percentile`: Calculate quantization parameters using percentiles of tensor values. - activations_dtype (QuantType, defaults to `QuantType.QUInt8`): + activations_dtype (`QuantType`, defaults to `QuantType.QUInt8`): The quantization data type to use for the activations. - weights_dtype (QuantType, defaults to `QuantType.QInt8`): + weights_dtype (`QuantType`, defaults to `QuantType.QInt8`): The quantization data type to use for the weights. - enable_ipu_cnn (bool, defaults to `True`): + enable_ipu_cnn (`bool`, defaults to `True`): Flag to generate a quantized model suitable for DPU/NPU computations. If True, the quantization process will consider specific limitations and requirements of DPU/NPU, optimizing the model accordingly. - input_nodes (List[str], defaults to an empty list `[]`): + input_nodes (`List[str]`, defaults to an empty list `[]`): List of names of starting nodes to be quantized. Nodes before these nodes will not be quantized. - output_nodes (List[str], defaults to an empty list `[]`): + output_nodes (`List[str]`, defaults to an empty list `[]`): List of names of end nodes to be quantized. Nodes after these nodes will not be quantized. - op_types_to_quantize (List[str], defaults to an empty list `[]`): + op_types_to_quantize (`List[str]`, defaults to an empty list `[]`): If specified, only operators of the given types will be quantized (e.g., ['Conv'] to quantize Convolutional layers). - random_data_reader_input_shape (Union[List[int], Tuple[int], Dict[str, List[int]]], defaults to an empty list `[]`): + random_data_reader_input_shape (`Union[List[int], Tuple[int], Dict[str, List[int]]]`, defaults to an empty list `[]`): Shapes of input nodes for internal random data reader. If dynamic axes require specific values, provide shapes. Format: list/tuple for single input, list of lists for multiple inputs, or dict {name: shape} for named inputs. - per_channel (bool, defaults to `False`): + per_channel (`bool`, defaults to `False`): Determines whether weights should be quantized per channel. Must be False for DPU/NPU devices. - reduce_range (bool, defaults to `False`): + reduce_range (`bool`, defaults to `False`): If True, quantizes weights with 7-bits. Must be False for DPU/NPU devices. - activations_dtype (QuantType, defaults to `QuantType.QInt8`): + activations_dtype (`QuantType`, defaults to `QuantType.QInt8`): Specifies the quantization data type for activations. - weights_dtype (QuantType, defaults to `QuantType.QInt8`): + weights_dtype (`QuantType`, defaults to `QuantType.QInt8`): Specifies the quantization data type for weights. Must be `QuantType.QInt8` for NPU devices. - nodes_to_quantize (List[str], defaults to an empty list `[]`): + nodes_to_quantize (`List[str]`, defaults to an empty list `[]`): If specified, only the nodes in this list are quantized. - nodes_to_exclude (List[str], defaults to an empty list `[]`): + nodes_to_exclude (`List[str]`, defaults to an empty list `[]`): If specified, nodes in this list will be excluded from quantization. - optimize_model (bool, defaults to `True`): + optimize_model (`bool`, defaults to `True`): If True, optimizes the model before quantization. - use_external_data_format (bool, defaults to `False`): + use_external_data_format (`bool`, defaults to `False`): Flag for large size (>2GB) models. If True, model proto and data will be stored in separate files. - execution_providers (List[str], defaults to `['CPUExecutionProvider']`): + execution_providers (`List[str]`, defaults to `['CPUExecutionProvider']`): Defines the execution providers used by ONNX Runtime for model calibration. - convert_fp16_to_fp32 (bool, defaults to `False`): + convert_fp16_to_fp32 (`bool`, defaults to `False`): Controls whether to convert the input model from float16 to float32 before quantization. - convert_nchw_to_nhwc (bool, defaults to `False`): + convert_nchw_to_nhwc (`bool`, defaults to `False`): Controls whether to convert the input NCHW model to NHWC model before quantization. - include_cle (bool, defaults to `False`): + include_cle (`bool`, defaults to `False`): Flag to optimize models using CrossLayerEqualization; can improve accuracy for some models. - extra_options (Union[Dict, None, ExtraOptions], defaults to an instance of `ExtraOptions` with default values): + extra_options (`Union[Dict, None, ExtraOptions]`, defaults to an instance of `ExtraOptions` with default values): Contains key-value pairs for various options in different cases. """ @@ -353,27 +358,46 @@ def __post_init__(self): if isinstance(self.extra_options, dict): self.extra_options = ExtraOptions(**self.extra_options) - if self.calibration_method in {"mse", "overflow"}: + if self.calibration_method in {vai_q_onnx.PowerOfTwoMethod.NonOverflow, vai_q_onnx.PowerOfTwoMethod.MinMSE}: self.extra_options.calib_tensor_range_symmetric = True - self.check_dtype_and_format(self.activations_dtype, "activations_dtype", self.format) - self.check_dtype_and_format(self.weights_dtype, "weights_dtype", self.format) + if ( + self.activations_dtype not in {vai_q_onnx.QuantType.QUInt8, vai_q_onnx.QuantType.QInt8} + and self.format != vai_q_onnx.VitisQuantFormat.QDQ + ): + raise ValueError( + f'activations_dtype is: "{self.activations_dtype.name.lower()}", format must be "vitisqdq".' + ) + if ( + self.weights_dtype not in {vai_q_onnx.QuantType.QUInt8, vai_q_onnx.QuantType.QInt8} + and self.format != vai_q_onnx.VitisQuantFormat.QDQ + ): + raise ValueError(f'weights_dtype is: "{self.weights_dtype.name.lower()}", format must be "vitisqdq".') if self.enable_ipu_cnn: - if self.format not in ["qdq"]: - raise ValueError('ipu cnn configuration only support format "qdq".') - if self.calibration_method not in ["nonoverflow", "mse"]: - raise ValueError('ipu cnn configuration only support calibration_method "nonoverflow" and "mse".') + if self.format != vai_q_onnx.QuantFormat.QDQ: + raise ValueError(f'ipu cnn configuration only support format "qdq". Got {self.format}.') + + if self.calibration_method not in { + vai_q_onnx.PowerOfTwoMethod.NonOverflow, + vai_q_onnx.PowerOfTwoMethod.MinMSE, + }: + raise ValueError( + f'ipu cnn configuration only support calibration_method "nonoverflow" and "mse". Got {self.calibration_method.name.lower()}.' + ) + if not (self.extra_options.activation_symmetric and self.extra_options.weight_symmetric): raise ValueError( "ipu cnn configuration requires setting activation_symmetric and weight_symmetric to true." ) - if self.activations_dtype not in ["uint8", "int8"]: - raise ValueError('ipu cnn configuration only support activations_dtype "uint8" and "int8".') - if self.weights_dtype not in ["int8"]: - raise ValueError('ipu cnn configuration only support weights_dtype "int8".') + + if self.weights_dtype != vai_q_onnx.QuantType.QInt8: + raise ValueError( + f'ipu cnn configuration only support weights_dtype "int8". Got {self.weights_dtype.name.lower()}.' + ) + if self.per_channel: - raise ValueError("ipu cnn configuration only supports per tensor.") + raise ValueError("ipu cnn configuration only supports per tensor. Got per_channel=True.") def __setattr__(self, name, value): if name == "extra_options" and isinstance(value, dict): @@ -381,16 +405,25 @@ def __setattr__(self, name, value): else: super().__setattr__(name, value) - def __getattr__(self, name): - value = getattr(self, name) - if name == "format": - value = self._map_format(value) - elif name == "calibration_method": - value = self._map_calibration_method(value) - elif name in ["activations_dtype", "weights_dtype"]: - value = self._map_dtypes(value, name) + def __getattribute__(self, name: str): + value = super().__getattribute__(name) + if isinstance(value, str): + if name == "format": + value = QUANT_FORMAT_MAPPING[value] + elif name == "calibration_method": + value = CALIBRATION_METHOD_MAPPING[value] + elif name == "activations_dtype": + value = QUANT_TYPE_MAPPING[value] + elif name == "weights_dtype": + value = QUANT_TYPE_MAPPING[value] + return value + def to_dict(self): + options_dict = self.__dict__.copy() + options_dict["extra_options"] = options_dict["extra_options"].to_diff_dict() + return options_dict + def to_diff_dict(self) -> dict: """ Returns a dictionary of non-default values in the configuration. @@ -416,75 +449,6 @@ def to_diff_dict(self) -> dict: non_default_values[option.name] = value return non_default_values - @staticmethod - def check_dtype_and_format(dtype, dtype_name, format): - if dtype not in ["uint8", "int8"] and format not in ["vitisqdq"]: - raise ValueError(f'{dtype_name} is: "{dtype}", format must be "vitisqdq".') - - def map_format(self): - mapping = { - "qdq": QuantFormat.QDQ, - "qop": QuantFormat.QOperator, - "vitisqdq": QuantFormat.VitisQuantFormat_QDQ, - } - return QuantizationConfig._map_value(mapping, self.format, "format") - - def map_calibration_method(self): - mapping = { - "nonoverflow": CalibrationMethod.NonOverflow, - "mse": CalibrationMethod.MinMSE, - "minmax": CalibrationMethod.MinMax, - "entropy": CalibrationMethod.Entropy, - "percentile": CalibrationMethod.Percentile, - } - return QuantizationConfig._map_value(mapping, self.calibration_method, "calibration method") - - @property - def _dtype_mapping(self): - mapping = { - "uint8": QuantType.QUInt8, - "int8": QuantType.QInt8, - "uint16": QuantType.QUInt16, - "int16": QuantType.QInt16, - "uint32": QuantType.QUInt32, - "int32": QuantType.QInt32, - "float16": QuantType.QFloat16, - "bfloat16": QuantType.QBFloat16, - } - return mapping - - def map_activations_dtype(self): - return QuantizationConfig._map_value(self._dtype_mapping, self.activations_dtype, "Activations dtype") - - def map_weights_dtype(self): - return QuantizationConfig._map_value(self._dtype_mapping, self.weights_dtype, "Weights dtype") - - @staticmethod - def _map_value(mapping, value, name): - try: - return mapping[value] - except KeyError: - valid_values = ", ".join(f'"{v}"' for v in mapping.keys()) - raise ValueError(f'{name} only supports the following values: {valid_values}. Received "{value}".') - - @staticmethod - def quantization_type_str(activations_dtype, weights_dtype) -> str: - str_mapping = { - QuantType.QUInt8: "u8", - QuantType.QInt8: "s8", - QuantType.QUInt16: "u16", - QuantType.QInt16: "s16", - QuantType.QUInt32: "u32", - QuantType.QInt32: "s32", - QuantType.QFloat16: "f16", - QuantType.QBFloat16: "bf16", - } - activations_str = str_mapping.get(activations_dtype) - weights_str = str_mapping.get(weights_dtype) - if activations_str is None or weights_str is None: - raise ValueError("Unsupported quantization type") - return f"{activations_str}/{weights_str}" - @property def use_symmetric_calibration(self) -> bool: if self.extra_options: @@ -495,7 +459,7 @@ def use_symmetric_calibration(self) -> bool: def __str__(self): return ( f"{self.format} (" - f"schema: {QuantizationConfig.quantization_type_str(self.activations_dtype, self.weights_dtype)}, " + f"schema: {self.activations_dtype.name}/{self.weights_dtype.name}, " f"enable_ipu_cnn: {self.enable_ipu_cnn})" ) @@ -601,7 +565,7 @@ def __init__( ): super().__init__() self.opset = opset - self.quantization = quantization.to_diff_dict() if quantization is not None else None + self.quantization = quantization.to_dict() if quantization is not None else None self.optimum_version = kwargs.pop("optimum_version", None) @staticmethod diff --git a/optimum/amd/ryzenai/quantization.py b/optimum/amd/ryzenai/quantization.py index 7be3c294..f8f2a7cf 100644 --- a/optimum/amd/ryzenai/quantization.py +++ b/optimum/amd/ryzenai/quantization.py @@ -164,25 +164,24 @@ def quantize( LOGGER.info("Quantizing model...") - format = quantization_config.map_format().value - calibration_method = quantization_config.map_calibration_method().value - activation_type = quantization_config.map_activations_dtype().value - weight_type = quantization_config.map_weights_dtype().value + quantization_config.map_calibration_method().value + quantization_config.map_activations_dtype().value + quantization_config.map_weights_dtype().value quantize_static( model_input=Path(self.onnx_model_path).as_posix(), model_output=quantized_model_path.as_posix(), calibration_data_reader=reader, - quant_format=format, - calibrate_method=calibration_method, + quant_format=quantization_config.format, + calibrate_method=quantization_config.calibration_method, input_nodes=quantization_config.input_nodes, output_nodes=quantization_config.output_nodes, op_types_to_quantize=quantization_config.op_types_to_quantize, random_data_reader_input_shape=quantization_config.random_data_reader_input_shape, per_channel=quantization_config.per_channel, reduce_range=quantization_config.reduce_range, - activation_type=activation_type, - weight_type=weight_type, + activation_type=quantization_config.activations_dtype, + weight_type=quantization_config.weights_dtype, nodes_to_quantize=quantization_config.nodes_to_quantize, nodes_to_exclude=quantization_config.nodes_to_exclude, optimize_model=quantization_config.optimize_model, diff --git a/tests/ryzenai/test_configuration.py b/tests/ryzenai/test_configuration.py index 015f049e..29382ceb 100644 --- a/tests/ryzenai/test_configuration.py +++ b/tests/ryzenai/test_configuration.py @@ -1,5 +1,6 @@ import unittest +import vai_q_onnx from parameterized import parameterized from optimum.amd.ryzenai import AutoQuantizationConfig, ExtraOptions, QuantizationConfig @@ -103,16 +104,16 @@ def test_parametric_setting_attributes(self, attribute, default_value, new_value class TestQuantizationConfig(unittest.TestCase): def test_default_values(self): config = QuantizationConfig() - self.assertEqual(config.format, "qdq") - self.assertEqual(config.calibration_method, "mse") + self.assertEqual(config.format, vai_q_onnx.QuantFormat.QDQ) + self.assertEqual(config.calibration_method, vai_q_onnx.PowerOfTwoMethod.MinMSE) self.assertEqual(config.input_nodes, []) self.assertEqual(config.output_nodes, []) self.assertEqual(config.op_types_to_quantize, []) self.assertEqual(config.random_data_reader_input_shape, []) self.assertFalse(config.per_channel) self.assertFalse(config.reduce_range) - self.assertEqual(config.activations_dtype, "uint8") - self.assertEqual(config.weights_dtype, "int8") + self.assertEqual(config.activations_dtype, vai_q_onnx.QuantType.QUInt8) + self.assertEqual(config.weights_dtype, vai_q_onnx.QuantType.QInt8) self.assertEqual(config.nodes_to_quantize, []) self.assertEqual(config.nodes_to_exclude, []) self.assertTrue(config.optimize_model) @@ -129,22 +130,16 @@ def test_extra_options_initialization(self): config = QuantizationConfig(extra_options=extra_options) self.assertEqual(config.extra_options.activation_symmetric, True) - def test_to_diff_dict(self): - config = QuantizationConfig(format="qop", calibration_method="entropy") - diff_dict = config.to_diff_dict() - self.assertEqual(diff_dict["format"], "qop") - self.assertEqual(diff_dict["calibration_method"], "entropy") - def test_use_symmetric_calibration(self): config = QuantizationConfig(extra_options=ExtraOptions(activation_symmetric=True, weight_symmetric=True)) self.assertTrue(config.use_symmetric_calibration) @parameterized.expand( [ - ("format", "qdq", "qop"), - ("calibration_method", "mse", "entropy"), - ("activations_dtype", "uint8", "int8"), - ("weights_dtype", "int8", "uint8"), + ("format", vai_q_onnx.QuantFormat.QDQ, vai_q_onnx.QuantFormat.QOperator), + ("calibration_method", vai_q_onnx.PowerOfTwoMethod.MinMSE, vai_q_onnx.CalibrationMethod.Entropy), + ("activations_dtype", vai_q_onnx.QuantType.QUInt8, vai_q_onnx.QuantType.QInt8), + ("weights_dtype", vai_q_onnx.QuantType.QInt8, vai_q_onnx.QuantType.QUInt8), ] ) def test_parametric_setting_attributes(self, attribute, default_value, new_value): @@ -157,17 +152,18 @@ def test_parametric_setting_attributes(self, attribute, default_value, new_value class TestAutoQuantizationConfig(unittest.TestCase): def test_ipu_cnn_config(self): config = AutoQuantizationConfig.ipu_cnn_config() - self.assertEqual(config.format, "qdq") - self.assertEqual(config.calibration_method, "mse") + self.assertEqual(config.format, vai_q_onnx.QuantFormat.QDQ) + self.assertEqual(config.calibration_method, vai_q_onnx.PowerOfTwoMethod.MinMSE) self.assertTrue(config.extra_options.activation_symmetric) def test_ipu_transformer_config(self): config = AutoQuantizationConfig.ipu_transformer_config() - self.assertEqual(config.format, "qdq") - self.assertEqual(config.calibration_method, "minmax") + self.assertEqual(config.format, vai_q_onnx.QuantFormat.QDQ) + self.assertEqual(config.calibration_method, vai_q_onnx.CalibrationMethod.MinMax) self.assertTrue(config.extra_options.activation_symmetric) def test_cpu_cnn_config(self): config = AutoQuantizationConfig.cpu_cnn_config() - self.assertEqual(config.format, "qdq") - self.assertEqual(config.calibration_method, "minmax") + + self.assertEqual(config.format, vai_q_onnx.QuantFormat.QDQ) + self.assertEqual(config.calibration_method, vai_q_onnx.CalibrationMethod.MinMax) From c3e25543cadde955f9c50a09487843374a5e0973 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Wed, 5 Jun 2024 15:23:01 +0530 Subject: [PATCH 21/22] addressed comments --- optimum/amd/ryzenai/quantization.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/optimum/amd/ryzenai/quantization.py b/optimum/amd/ryzenai/quantization.py index f8f2a7cf..92aadaf5 100644 --- a/optimum/amd/ryzenai/quantization.py +++ b/optimum/amd/ryzenai/quantization.py @@ -164,10 +164,6 @@ def quantize( LOGGER.info("Quantizing model...") - quantization_config.map_calibration_method().value - quantization_config.map_activations_dtype().value - quantization_config.map_weights_dtype().value - quantize_static( model_input=Path(self.onnx_model_path).as_posix(), model_output=quantized_model_path.as_posix(), From e67ed2f2d237f72b9b124090d77f1b77312748f6 Mon Sep 17 00:00:00 2001 From: Mohit Sharma Date: Wed, 5 Jun 2024 15:27:50 +0530 Subject: [PATCH 22/22] fix docstring --- optimum/amd/ryzenai/configuration.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/optimum/amd/ryzenai/configuration.py b/optimum/amd/ryzenai/configuration.py index 8e1382ea..51efca39 100644 --- a/optimum/amd/ryzenai/configuration.py +++ b/optimum/amd/ryzenai/configuration.py @@ -289,10 +289,6 @@ class QuantizationConfig: - `CalibrationMethod.MinMax`: Obtain quantization parameters based on minimum and maximum values of each tensor. - `CalibrationMethod.Entropy`: Determine quantization parameters based on the entropy algorithm of each tensor's distribution. - `CalibrationMethod.Percentile`: Calculate quantization parameters using percentiles of tensor values. - activations_dtype (`QuantType`, defaults to `QuantType.QUInt8`): - The quantization data type to use for the activations. - weights_dtype (`QuantType`, defaults to `QuantType.QInt8`): - The quantization data type to use for the weights. enable_ipu_cnn (`bool`, defaults to `True`): Flag to generate a quantized model suitable for DPU/NPU computations. If True, the quantization process will consider specific limitations and requirements of DPU/NPU, optimizing the model accordingly. @@ -309,9 +305,9 @@ class QuantizationConfig: Determines whether weights should be quantized per channel. Must be False for DPU/NPU devices. reduce_range (`bool`, defaults to `False`): If True, quantizes weights with 7-bits. Must be False for DPU/NPU devices. - activations_dtype (`QuantType`, defaults to `QuantType.QInt8`): + activations_dtype (`QuantType`, defaults to `quint8`): Specifies the quantization data type for activations. - weights_dtype (`QuantType`, defaults to `QuantType.QInt8`): + weights_dtype (`QuantType`, defaults to `qint8`): Specifies the quantization data type for weights. Must be `QuantType.QInt8` for NPU devices. nodes_to_quantize (`List[str]`, defaults to an empty list `[]`): If specified, only the nodes in this list are quantized.