diff --git a/docs/Dockerfile b/docs/Dockerfile index 95f42141..01d17003 100644 --- a/docs/Dockerfile +++ b/docs/Dockerfile @@ -24,8 +24,8 @@ RUN npm install npm@9.8.1 -g && \ RUN python3 -m pip install --no-cache-dir --upgrade pip RUN python3 -m pip install --no-cache-dir git+https://github.com/huggingface/doc-builder.git -RUN git clone --depth 1 --branch v3.5 https://github.com/Xilinx/Vitis-AI.git && cd Vitis-AI/src/vai_quantizer/vai_q_onnx && sh build.sh && pip install pkgs/*.whl +RUN git clone --depth 1 --branch feature/onnx https://gitenterprise.xilinx.com/AMDNeuralOpt/Quark.git && cd Vitis-AI/src/vai_quantizer/Quark && python setup.py sdist bdist_wheel -d pkgs && pip install pkgs/*.whl RUN git clone $clone_url && cd optimum-amd && git checkout $commit_sha RUN python3 -m pip install --no-cache-dir ./optimum-amd[brevitas,tests] -RUN pip install onnxruntime==1.14.0 +RUN pip install onnxruntime==1.17.0 diff --git a/docs/source/ryzenai/package_reference/quantization.mdx b/docs/source/ryzenai/package_reference/quantization.mdx index 8dcd8b45..bfce01bc 100644 --- a/docs/source/ryzenai/package_reference/quantization.mdx +++ b/docs/source/ryzenai/package_reference/quantization.mdx @@ -2,9 +2,9 @@ Licensed under the MIT License. --> -# Quantization for Ryzen AI IPU +# Quantization for Ryzen AI NPU -Please refer to the guide [How to apply quantization](https://huggingface.co/docs/optimum/amd/ryzenai/usage_guides/quantization) to understand how to use the following classes to quantize models targeting Ryzen AI IPU. +Please refer to the guide [How to apply quantization](https://huggingface.co/docs/optimum/amd/ryzenai/usage_guides/quantization) to understand how to use the following classes to quantize models targeting Ryzen AI NPU. ## Using Vitis AI Quantizer diff --git a/docs/source/ryzenai/usage_guides/quantization.mdx b/docs/source/ryzenai/usage_guides/quantization.mdx index af009917..d270a960 100644 --- a/docs/source/ryzenai/usage_guides/quantization.mdx +++ b/docs/source/ryzenai/usage_guides/quantization.mdx @@ -4,7 +4,7 @@ Licensed under the MIT License. # Quantization for Ryzen AI -Ryzen AI IPU best performances are achieved using [quantized models](https://huggingface.co/docs/optimum/concept_guides/quantization). There are two different ways to quantize models for Ryzen AI IPU: +Ryzen AI NPU best performances are achieved using [quantized models](https://huggingface.co/docs/optimum/concept_guides/quantization). There are two different ways to quantize models for Ryzen AI NPU: * through [Vitis AI Quantizer](https://ryzenai.docs.amd.com/en/latest/vai_quant/vai_q_onnx.html), used in Optimum's [`~ryzenai.RyzenAIOnnxQuantizer`], which is designed for ONNX model quantization. Currently supports quantising [timm](https://github.com/huggingface/pytorch-image-models) models using dynamic and static quantization methods. * through [Brevitas](https://github.com/Xilinx/brevitas) library, used in Optimum's [`~brevitas.BrevitasQuantizer`]. Brevitas allows to quantize directly PyTorch models, which may be optionally exported to ONNX. This is recommended to quantize other models. @@ -14,7 +14,7 @@ Ryzen AI IPU best performances are achieved using [quantized models](https://hug RyzenAI Quantizer provides an easy-to-use Post Training Quantization (PTQ) flow on the pre-trained model saved in the ONNX format. It generates a quantized ONNX model ready to be deployed with the Ryzen AI. -The Quantizer supports various configuration and functions to quantize models targeting for deployment on IPU_CNN, IPU_Transformer and CPU. +The Quantizer supports various configuration and functions to quantize models targeting for deployment on NPU_CNN, NPU_Transformer and CPU. The [`~ryzenai.RyzenAIOnnxQuantizer`] can be initialized using the `from_pretrained` method, either from a local model folder or a model hosted on Hugging Face Hub: @@ -28,7 +28,7 @@ Below you will find an easy end-to-end example on how to quantize a VGG model fr * To begin, export the VGG model to ONNX using [Optimum Exporters](https://huggingface.co/docs/optimum/main/en/exporters/onnx/overview). Ensure static shapes are specified for inference. * Create a preprocessing function to handle specific image format conversions and apply necessary transformations to prepare the input for the model. -* Initialize the RyzenAI quantizer (RyzenAIOnnxQuantizer) and configure the quantization settings using AutoQuantizationConfig. The recommended quantization configuration for CNN models to be deployed on the IPU is loaded using `ipu_cnn_config`. +* Initialize the RyzenAI quantizer (RyzenAIOnnxQuantizer) and configure the quantization settings using AutoQuantizationConfig. The recommended quantization configuration for CNN models to be deployed on the NPU is loaded using `npu_cnn_config`. * Obtain a calibration dataset using the quantizer's `get_calibration_dataset` method. This dataset is crucial for computing quantization parameters during the quantization process. * Run the quantizer with the specified quantization configuration and calibration data. The quantization parameters computed during this process are embedded as constants in the quantized model. * The resulting quantized model is saved in the specified quantization directory. @@ -76,7 +76,7 @@ Below you will find an easy end-to-end example on how to quantize a VGG model fr >>> quantizer = RyzenAIOnnxQuantizer.from_pretrained(export_dir) >>> # Step 4: Load recommended quantization config for model ->>> quantization_config = AutoQuantizationConfig.ipu_cnn_config() +>>> quantization_config = AutoQuantizationConfig.npu_cnn_config() >>> # Step 5: Obtain a calibration dataset for computing quantization parameters >>> train_calibration_dataset = quantizer.get_calibration_dataset( diff --git a/examples/quantization/ryzenai/README.md b/examples/quantization/ryzenai/README.md index 8e89cafc..16cf6f01 100644 --- a/examples/quantization/ryzenai/README.md +++ b/examples/quantization/ryzenai/README.md @@ -7,10 +7,10 @@ The quantization process is abstracted via the AutoQuantizationConfig and the Ry You can read the [Vitis AI Quantizer for ONNX](https://ryzenai.docs.amd.com/en/latest/vai_quant/vai_q_onnx.html) to learn about VAI_Q_ONNX quantization. ### Creating an AutoQuantizationConfig -The AutoQuantizationConfig class is used to specify how quantization should be done. The class can be initialized using the ipu_cnn_config() method. +The AutoQuantizationConfig class is used to specify how quantization should be done. The class can be initialized using the npu_cnn_config() method. ```python from optimum.amd.ryzenai import AutoQuantizationConfig -quantization_config = AutoQuantizationConfig.ipu_cnn_config() +quantization_config = AutoQuantizationConfig.npu_cnn_config() ``` diff --git a/examples/quantization/ryzenai/quantize_image_classification_model.py b/examples/quantization/ryzenai/quantize_image_classification_model.py index 87015961..a55550ed 100644 --- a/examples/quantization/ryzenai/quantize_image_classification_model.py +++ b/examples/quantization/ryzenai/quantize_image_classification_model.py @@ -43,7 +43,7 @@ def preprocess_fn(ex, transforms): # quantize quantizer = RyzenAIOnnxQuantizer.from_pretrained(onnx_model) - quantization_config = AutoQuantizationConfig.ipu_cnn_config() + quantization_config = AutoQuantizationConfig.npu_cnn_config() calibration_dataset = quantizer.get_calibration_dataset( args.dataset, diff --git a/optimum/amd/ryzenai/configuration.py b/optimum/amd/ryzenai/configuration.py index c043d473..3916ceed 100644 --- a/optimum/amd/ryzenai/configuration.py +++ b/optimum/amd/ryzenai/configuration.py @@ -2,100 +2,50 @@ # Licensed under the MIT License. """Configuration classes for quantization with RyzenAI.""" -from dataclasses import asdict, dataclass +from dataclasses import asdict from enum import Enum from typing import Optional -import vai_q_onnx -from onnxruntime.quantization import CalibrationMethod, QuantFormat, QuantType +from onnxruntime.quantization import CalibrationMethod, QuantType +from quark.onnx.calibrate import PowerOfTwoMethod +from quark.onnx.quantization.config.config import QuantizationConfig from optimum.configuration_utils import BaseConfig -@dataclass -class QuantizationConfig: - """ - QuantizationConfig is the configuration class handling all the RyzenAI quantization parameters. - - Args: - is_static (`bool`): - Whether to apply static quantization or dynamic quantization. - format (`QuantFormat`): - Targeted RyzenAI quantization representation format. - For the Operator Oriented (QOperator) format, all the quantized operators have their own ONNX definitions. - For the Tensor Oriented (QDQ) format, the model is quantized by inserting QuantizeLinear / DeQuantizeLinear - operators. - calibration_method (`CalibrationMethod`): - The method chosen to calculate the activations quantization parameters using the calibration dataset. - activations_dtype (`QuantType`, defaults to `QuantType.QUInt8`): - The quantization data types to use for the activations. - activations_symmetric (`bool`, defaults to `False`): - Whether to apply symmetric quantization on the activations. - weights_dtype (`QuantType`, defaults to `QuantType.QInt8`): - The quantization data types to use for the weights. - weights_symmetric (`bool`, defaults to `True`): - Whether to apply symmetric quantization on the weights. - enable_dpu (`bool`, defaults to `True`): - Determines whether to generate a quantized model that is suitable for the DPU. If set to True, the quantization - process will create a model that is optimized for DPU computations. - - """ - - format: QuantFormat = QuantFormat.QDQ - calibration_method: CalibrationMethod = vai_q_onnx.PowerOfTwoMethod.MinMSE - activations_dtype: QuantType = QuantType.QUInt8 - activations_symmetric: bool = True - weights_dtype: QuantType = QuantType.QInt8 - weights_symmetric: bool = True - enable_dpu: bool = True - +class AutoQuantizationConfig: @staticmethod - def quantization_type_str(activations_dtype: QuantType, weights_dtype: QuantType) -> str: - return ( - f"{'s8' if activations_dtype == QuantType.QInt8 else 'u8'}" - f"/" - f"{'s8' if weights_dtype == QuantType.QInt8 else 'u8'}" - ) - - @property - def use_symmetric_calibration(self) -> bool: - return self.activations_symmetric and self.weights_symmetric - - def __str__(self): - return ( - f"{self.format} (" - f"schema: {QuantizationConfig.quantization_type_str(self.activations_dtype, self.weights_dtype)}, " - f"enable_dpu: {self.enable_dpu})" + def npu_cnn_config(): + return QuantizationConfig( + calibrate_method=PowerOfTwoMethod.MinMSE, + activation_type=QuantType.QUInt8, + weight_type=QuantType.QInt8, + enable_npu_cnn=True, + extra_options={"ActivationSymmetric": True}, ) - -class AutoQuantizationConfig: @staticmethod - def ipu_cnn_config(): + def npu_transformer_config(): return QuantizationConfig( - format=QuantFormat.QDQ, - calibration_method=vai_q_onnx.PowerOfTwoMethod.MinMSE, - activations_dtype=QuantType.QUInt8, - activations_symmetric=True, - weights_dtype=QuantType.QInt8, - weights_symmetric=True, - enable_dpu=True, + calibrate_method=CalibrationMethod.MinMax, + activation_type=QuantType.QInt8, + weight_type=QuantType.QInt8, + enable_npu_transformer=True, ) @staticmethod def cpu_cnn_config( - use_symmetric_activations: bool = False, - use_symmetric_weights: bool = True, - enable_dpu: bool = False, + include_cle: bool = True, + include_fast_ft: bool = True, + extra_options: dict = None, ): return QuantizationConfig( - format=QuantFormat.QDQ, - calibration_method=vai_q_onnx.CalibrationMethod.MinMax, - activations_dtype=QuantType.QUInt8, - activations_symmetric=use_symmetric_activations, - weights_dtype=QuantType.QInt8, - weights_symmetric=use_symmetric_weights, - enable_dpu=enable_dpu, + calibrate_method=CalibrationMethod.Percentile, + activation_type=QuantType.QInt8, + weight_type=QuantType.QInt8, + include_cle=include_cle, + include_fast_ft=include_fast_ft, + extra_options=extra_options, ) diff --git a/optimum/amd/ryzenai/pipelines/__init__.py b/optimum/amd/ryzenai/pipelines/__init__.py index 7b9c0051..0780748f 100644 --- a/optimum/amd/ryzenai/pipelines/__init__.py +++ b/optimum/amd/ryzenai/pipelines/__init__.py @@ -128,7 +128,7 @@ def pipeline( The model that will be used by the pipeline to make predictions. This can be a model identifier or an actual instance of a pretrained model. If not provided, the default model for the specified task will be loaded. vaip_config (`Optional[str]`, defaults to `None`): - Runtime configuration file for inference with Ryzen IPU. A default config file can be found in the Ryzen AI VOE package, + Runtime configuration file for inference with Ryzen NPU. A default config file can be found in the Ryzen AI VOE package, extracted during installation under the name `vaip_config.json`. model_type (`Optional[str]`, defaults to `None`): Model type for the model diff --git a/optimum/amd/ryzenai/quantization.py b/optimum/amd/ryzenai/quantization.py index 28fc5621..b422a85a 100644 --- a/optimum/amd/ryzenai/quantization.py +++ b/optimum/amd/ryzenai/quantization.py @@ -12,12 +12,13 @@ import onnx from datasets import Dataset, load_dataset from onnxruntime.quantization import CalibrationDataReader -from vai_q_onnx import quantize_static +from quark.onnx import ModelQuantizer +from quark.onnx.quantization.config.config import Config, QuantizationConfig from optimum.quantization_base import OptimumQuantizer from transformers import PretrainedConfig -from .configuration import QuantizationConfig, RyzenAIConfig +from .configuration import RyzenAIConfig from .modeling import RyzenAIModel @@ -161,22 +162,11 @@ def quantize( suffix = f"_{file_suffix}" if file_suffix else "" quantized_model_path = save_dir.joinpath(f"{self.onnx_model_path.stem}{suffix}").with_suffix(".onnx") - LOGGER.info("Quantizing model...") - quantize_static( - model_input=Path(self.onnx_model_path).as_posix(), - model_output=quantized_model_path.as_posix(), - calibration_data_reader=reader, - quant_format=quantization_config.format, - calibrate_method=quantization_config.calibration_method, - weight_type=quantization_config.weights_dtype, - activation_type=quantization_config.activations_dtype, - enable_dpu=quantization_config.enable_dpu, - extra_options={ - "WeightSymmetric": quantization_config.weights_symmetric, - "ActivationSymmetric": quantization_config.activations_symmetric, - }, - ) + + quant_config = Config(global_quant_config=quantization_config) + quantizer = ModelQuantizer(quant_config) + quantizer.quantize_model(Path(self.onnx_model_path).as_posix(), quantized_model_path.as_posix(), reader) LOGGER.info(f"Saved quantized model at: {save_dir}") diff --git a/setup.py b/setup.py index f5dbf85c..73a40a19 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ "Programming Language :: Python :: 3.11", "Topic :: Scientific/Engineering :: Artificial Intelligence", ], - keywords="transformers, amd, ryzen, ipu, quantization, on-device, instinct", + keywords="transformers, amd, ryzen, npu, quantization, on-device, instinct", url="https://github.com/huggingface/optimum-amd", author="HuggingFace Inc. Special Ops Team", author_email="hardware@huggingface.co", diff --git a/tests/ryzenai/1x4.xclbin b/tests/ryzenai/1x4.xclbin new file mode 100644 index 00000000..eb63d209 Binary files /dev/null and b/tests/ryzenai/1x4.xclbin differ diff --git a/tests/ryzenai/4x4.xclbin b/tests/ryzenai/4x4.xclbin new file mode 100644 index 00000000..3b7d0741 Binary files /dev/null and b/tests/ryzenai/4x4.xclbin differ diff --git a/tests/ryzenai/test_modeling.py b/tests/ryzenai/test_modeling.py index 3a94bc1b..fe568d1e 100644 --- a/tests/ryzenai/test_modeling.py +++ b/tests/ryzenai/test_modeling.py @@ -97,14 +97,14 @@ def test_model(self, model_id): file_name, ort_input, input_name = load_model_and_input(model_id) - outputs_ipu, outputs_cpu = self.prepare_outputs( + outputs_npu, outputs_cpu = self.prepare_outputs( model_id, RyzenAIModelForImageClassification, ort_input, cache_dir, cache_key, file_name ) - self.assertIn("logits", outputs_ipu) + self.assertIn("logits", outputs_npu) self.assertIn("logits", outputs_cpu) - self.assertTrue(np.allclose(outputs_ipu.logits, outputs_cpu.logits, atol=1e-4)) + self.assertTrue(np.allclose(outputs_npu.logits, outputs_cpu.logits, atol=1e-4)) current_ops = self.get_ops(cache_dir, cache_key) baseline_ops = self.get_baseline_ops(cache_key) @@ -147,12 +147,12 @@ def test_model(self, model_id): file_name, ort_input, input_name = load_model_and_input(model_id) - outputs_ipu, outputs_cpu = self.prepare_outputs( + outputs_npu, outputs_cpu = self.prepare_outputs( model_id, RyzenAIModelForObjectDetection, ort_input, cache_dir, cache_key, file_name ) - for output_ipu, output_cpu in zip(outputs_ipu.values(), outputs_cpu.values()): - self.assertTrue(np.allclose(output_ipu, output_cpu, atol=1e-4)) + for output_npu, output_cpu in zip(outputs_npu.values(), outputs_cpu.values()): + self.assertTrue(np.allclose(output_npu, output_cpu, atol=1e-4)) current_ops = self.get_ops(cache_dir, cache_key) baseline_ops = self.get_baseline_ops(cache_key) @@ -212,12 +212,12 @@ def test_model(self, model_id): file_name, ort_input, input_name = load_model_and_input(model_id) - outputs_ipu, outputs_cpu = self.prepare_outputs( + outputs_npu, outputs_cpu = self.prepare_outputs( model_id, RyzenAIModelForSemanticSegmentation, ort_input, cache_dir, cache_key, file_name ) - for output_ipu, output_cpu in zip(outputs_ipu.values(), outputs_cpu.values()): - self.assertTrue(np.allclose(output_ipu, output_cpu, atol=1e-4)) + for output_npu, output_cpu in zip(outputs_npu.values(), outputs_cpu.values()): + self.assertTrue(np.allclose(output_npu, output_cpu, atol=1e-4)) current_ops = self.get_ops(cache_dir, cache_key) baseline_ops = self.get_baseline_ops(cache_key) @@ -237,12 +237,12 @@ def test_model(self, model_id): file_name, ort_input, input_name = load_model_and_input(model_id) - outputs_ipu, outputs_cpu = self.prepare_outputs( + outputs_npu, outputs_cpu = self.prepare_outputs( model_id, RyzenAIModelForImageToImage, ort_input, cache_dir, cache_key, file_name ) - for output_ipu, output_cpu in zip(outputs_ipu.values(), outputs_cpu.values()): - self.assertTrue(np.allclose(output_ipu, output_cpu, atol=1e-4)) + for output_npu, output_cpu in zip(outputs_npu.values(), outputs_cpu.values()): + self.assertTrue(np.allclose(output_npu, output_cpu, atol=1e-4)) current_ops = self.get_ops(cache_dir, cache_key) baseline_ops = self.get_baseline_ops(cache_key) @@ -263,12 +263,12 @@ def test_model(self, model_id): file_name, ort_input, input_name = load_model_and_input(model_id) ort_input = {input_name: ort_input} - outputs_ipu, outputs_cpu = self.prepare_outputs( + outputs_npu, outputs_cpu = self.prepare_outputs( model_id, RyzenAIModelForCustomTasks, ort_input, cache_dir, cache_key, file_name ) - for output_ipu, output_cpu in zip(outputs_ipu.values(), outputs_cpu.values()): - self.assertTrue(np.allclose(output_ipu, output_cpu, atol=1e-4)) + for output_npu, output_cpu in zip(outputs_npu.values(), outputs_cpu.values()): + self.assertTrue(np.allclose(output_npu, output_cpu, atol=1e-4)) current_ops = self.get_ops(cache_dir, cache_key) baseline_ops = self.get_baseline_ops(cache_key) diff --git a/tests/ryzenai/test_quantization.py b/tests/ryzenai/test_quantization.py index 743f0858..fdbd593f 100644 --- a/tests/ryzenai/test_quantization.py +++ b/tests/ryzenai/test_quantization.py @@ -94,7 +94,7 @@ def preprocess_fn(ex, transforms): # quantize model quantizer = RyzenAIOnnxQuantizer.from_pretrained(export_dir.name) - quantization_config = AutoQuantizationConfig.ipu_cnn_config() + quantization_config = AutoQuantizationConfig.npu_cnn_config() train_calibration_dataset = quantizer.get_calibration_dataset( "imagenet-1k", @@ -116,11 +116,11 @@ def preprocess_fn(ex, transforms): evaluation_set = load_dataset(dataset_name, split="validation", streaming=True, trust_remote_code=True) ort_inputs = preprocess_fn(next(iter(evaluation_set)), transforms)["pixel_values"].unsqueeze(0) - outputs_ipu, outputs_cpu = self.prepare_outputs( + outputs_npu, outputs_cpu = self.prepare_outputs( quantization_dir.name, RyzenAIModelForImageClassification, ort_inputs, cache_dir, cache_key ) - self.assertTrue(torch.allclose(outputs_ipu.logits, outputs_cpu.logits, atol=1e-4)) + self.assertTrue(torch.allclose(outputs_npu.logits, outputs_cpu.logits, atol=1e-4)) current_ops = self.get_ops(cache_dir, cache_key) baseline_ops = self.get_baseline_ops(cache_key) diff --git a/tests/ryzenai/test_timm_quant_and_eval.py b/tests/ryzenai/test_timm_quant_and_eval.py new file mode 100644 index 00000000..42d9763e --- /dev/null +++ b/tests/ryzenai/test_timm_quant_and_eval.py @@ -0,0 +1,292 @@ +# +# Copyright (C) 2023, Advanced Micro Devices, Inc. All rights reserved. +# SPDX-License-Identifier: MIT +# +import os +import shutil +import tarfile +import time +from argparse import ArgumentParser + +import numpy as np +import onnxruntime +import onnx +import timm +import torch +from datasets import Dataset +from timm.data import create_dataset, create_loader +from timm.models import create_model +from timm.utils import AverageMeter +from tqdm import tqdm + +from optimum.amd.ryzenai import ( + AutoQuantizationConfig, + RyzenAIModelForImageClassification, + RyzenAIOnnxQuantizer, +) + + +""" +If you already have an ImageNet datasets, you can directly use your dataset path with' --calib-data-path' and '--eval-data-path'. + +To prepare the test data, please check the download section of the main website: +https://huggingface.co/datasets/imagenet-1k/tree/main/data. +You need to register and download **val_images.tar.gz**. + +For example: +python test_timm_quant_and_eval.py -c $PATH/calib_100 -e $PATH/val_data -m timm/botnet26t_256.c1_in1k +or +python test_timm_quant_and_eval.py -v $PATH/val_images.tar.gz -m timm/botnet26t_256.c1_in1k + +Float Accuracy of timm/botnet26t_256.c1_in1k: +- Prec@1: 79.258% +- Prec@5: 94.538% + +Quantization Accuracy of timm/botnet26t_256.c1_in1k: +- Prec@1: 79.036% +- Prec@5: 94.408% +""" + + +def parse_args(): + parser = ArgumentParser("RyzenAIQuantization") + parser.add_argument("-v", "--val-path", metavar="DIR", required=False, help="path to dataset") + parser.add_argument("-c", "--calib-data-path", metavar="DIR", required=False, help="path to dataset") + parser.add_argument("-e", "--eval-data-path", metavar="DIR", required=False, help="path to dataset") + parser.add_argument( + "-m", + "--model_id", + type=str, + default="timm/botnet26t_256.c1_in1k", + help='Model id, default to "timm/botnet26t_256.c1_in1k"', + ) + parser.add_argument( + "--dataset", type=str, default="imagenet-1k", help='Calibration dataset, default to "imagenet-1k"' + ) + parser.add_argument( + "--onnx-output-opt", default="", type=str, metavar="PATH", help="path to output optimized onnx graph" + ) + parser.add_argument("--profile", action="store_true", default=False, help="Enable profiler output.") + parser.add_argument( + "-j", "--workers", default=2, type=int, metavar="N", help="number of data loading workers (default: 2)" + ) + parser.add_argument("-b", "--batch-size", default=1, type=int, metavar="N", help="mini-batch size (default: 1)") + + # execution provider options + parser.add_argument('--ep', type=str, default ='cpu',choices = ['cpu','npu'], help='EP backend selection') + + args, _ = parser.parse_known_args() + if args.val_path is None and (args.calib_data_path is None and args.eval_data_path is None): + parser.error("You must either provide --calib-data-path and --eval-data-path, or --val-path") + + return args + + +def main(args): + torch.multiprocessing.set_sharing_strategy("file_system") + # prepare val data and calib data + if (args.calib_data_path is None and args.eval_data_path is None) and args.val_path is not None: + source_folder = "val_data" + calib_data_path = "calib_data" + if os.path.isdir(source_folder) and os.path.isdir(calib_data_path): + print( + f"Detected that {source_folder} and {calib_data_path} already exist, skipping the creation of the calibration dataset." + ) + else: + os.makedirs(source_folder, exist_ok=True) + with tarfile.open(args.val_path, "r:gz") as tar: + tar.extractall(path=source_folder) + + if not os.path.exists(source_folder): + raise ValueError("The val_data does not exist.") + files = os.listdir(source_folder) + for filename in files: + if not filename.startswith("ILSVRC2012_val_") or not filename.endswith(".JPEG"): + continue + + n_identifier = filename.split("_")[-1].split(".")[0] + folder_name = n_identifier + folder_path = os.path.join(source_folder, folder_name) + if not os.path.exists(folder_path): + os.makedirs(folder_path) + file_path = os.path.join(source_folder, filename) + destination = os.path.join(folder_path, filename) + shutil.move(file_path, destination) + + print("File organization complete.") + + if not os.path.exists(calib_data_path): + os.makedirs(calib_data_path) + + destination_folder = calib_data_path + + subfolders = os.listdir(source_folder) + cnt = 0 + for subfolder in subfolders: + source_subfolder = os.path.join(source_folder, subfolder) + destination_subfolder = os.path.join(destination_folder, subfolder) + os.makedirs(destination_subfolder, exist_ok=True) + + files = os.listdir(source_subfolder) + + if files: + file_to_copy = files[0] + source_file = os.path.join(source_subfolder, file_to_copy) + destination_file = os.path.join(destination_subfolder, file_to_copy) + + shutil.copy(source_file, destination_file) + cnt += 1 + if cnt >= 200: + break + + print("Creating calibration dataset complete.") + args.calib_data_path = calib_data_path + args.eval_data_path = source_folder + + model_id = args.model_id + + onnx_model = RyzenAIModelForImageClassification.from_pretrained( + model_id, export=True, provider="CPUExecutionProvider" + ) + # # preprocess config + model = create_model(model_id, pretrained=False) + data_config = timm.data.resolve_data_config(model=model) + + # # quantize + quantizer = RyzenAIOnnxQuantizer.from_pretrained(onnx_model) + # determine whether to use npu config + if args.ep == "npu": + quantization_config = AutoQuantizationConfig.npu_cnn_config() + else: + quantization_config = AutoQuantizationConfig.cpu_cnn_config() + quantization_config.include_fast_ft = True + # determine if cuda is available + if torch.cuda.is_available(): + device = "cuda:0" + else: + device = "cpu" + quantization_config.extra_options = { + "FastFinetune": { + "BatchSize": 2, + "FixedSeed": 1705472343, + "NumBatches": 1, + "NumIterations": 2000, + "LearningRate": 0.1, + "OptimAlgorithm": "adaround", + "OptimDevice": device, # "cuda:0" or 'cpu' + "LRAdjust": (), + "SelectiveUpdate": False, + "EarlyStop": True, + "DropRatio": 0.75, + "RegParam": 0.01, # default + "BetaRange": (20, 2), # default + "WarmStart": 0.2, # default + }, + "Percentile": 99.9999, + } + + calib_loader = create_loader( + create_dataset("", args.calib_data_path), + input_size=data_config["input_size"], + batch_size=args.batch_size, + use_prefetcher=False, + interpolation=data_config["interpolation"], + mean=data_config["mean"], + std=data_config["std"], + num_workers=args.workers, + crop_pct=data_config["crop_pct"], + ) + + data_list = [] + + for batch in calib_loader: + data = batch[0] + data_list.append(data) + + data_list = torch.cat(data_list, dim=0) + + data_np = data_list.numpy() + + data_dict = { + "pixel_values": data_np, + } + + calibration_dataset = Dataset.from_dict(data_dict) + + quantizer.quantize( + quantization_config=quantization_config, dataset=calibration_dataset, save_dir="quantized_model" + ) + + # Set graph optimization level + sess_options = onnxruntime.SessionOptions() + sess_options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_ENABLE_ALL + if args.profile: + sess_options.enable_profiling = True + if args.onnx_output_opt: + sess_options.optimized_model_filepath = args.onnx_output_opt + + if args.ep == "npu": + print("Run evaluation on NPU") + # clear modelcachekey if it exist + if os.path.exists("modelcachekey") and os.path.isdir("modelcachekey"): + shutil.rmtree("modelcachekey") + + from pathlib import Path + providers = ['VitisAIExecutionProvider'] + cache_dir = Path(__file__).parent.resolve() + provider_options = [{ + 'config_file': 'vaip_config.json', + 'cacheDir': str(cache_dir), + 'cacheKey': 'modelcachekey' + }] + model = onnx.load("quantized_model/model_quantized.onnx") + session = onnxruntime.InferenceSession(model.SerializeToString(), sess_options, providers=providers, + provider_options=provider_options) + else: + session = onnxruntime.InferenceSession("quantized_model/model_quantized.onnx", sess_options) + + loader = create_loader( + create_dataset("", args.eval_data_path), + input_size=data_config["input_size"], + batch_size=args.batch_size, + use_prefetcher=False, + interpolation=data_config["interpolation"], + mean=data_config["mean"], + std=data_config["std"], + num_workers=args.workers, + crop_pct=data_config["crop_pct"], + ) + + input_name = session.get_inputs()[0].name + + batch_time = AverageMeter() + top1 = AverageMeter() + top5 = AverageMeter() + end = time.time() + for input, target in tqdm(loader, desc="Processing"): + # run the net and return prediction + output = session.run([], {input_name: input.data.numpy()}) + output = output[0] + + # measure accuracy and record loss + prec1, prec5 = accuracy_np(output, target.numpy()) + top1.update(prec1.item(), input.size(0)) + top5.update(prec5.item(), input.size(0)) + + # measure elapsed time + batch_time.update(time.time() - end) + end = time.time() + + print(f" * Prec@1 {top1.avg:.3f} ({100-top1.avg:.3f}) Prec@5 {top5.avg:.3f} ({100.-top5.avg:.3f})") + + +def accuracy_np(output, target): + max_indices = np.argsort(output, axis=1)[:, ::-1] + top5 = 100 * np.equal(max_indices[:, :5], target[:, np.newaxis]).sum(axis=1).mean() + top1 = 100 * np.equal(max_indices[:, 0], target).mean() + return top1, top5 + + +if __name__ == "__main__": + args = parse_args() + main(args) diff --git a/tests/ryzenai/testing_utils.py b/tests/ryzenai/testing_utils.py index ace1b183..b1965a2a 100644 --- a/tests/ryzenai/testing_utils.py +++ b/tests/ryzenai/testing_utils.py @@ -57,7 +57,7 @@ def run_model( def prepare_outputs(self, model_id, model_class, ort_input, cache_dir=None, cache_key=None, file_name=None): set_seed(SEED) - output_ipu = self.run_model( + output_npu = self.run_model( model_class, model_id, ort_input, @@ -77,7 +77,7 @@ def prepare_outputs(self, model_id, model_class, ort_input, cache_dir=None, cach file_name=file_name, ) - return output_ipu, output_cpu + return output_npu, output_cpu def get_ops(self, cache_dir, cache_key): result = parse_json(os.path.join(cache_dir, cache_key, "vitisai_ep_report.json")) diff --git a/tests/ryzenai/vaip_config.json b/tests/ryzenai/vaip_config.json index fbca88f0..79f2ce33 100644 --- a/tests/ryzenai/vaip_config.json +++ b/tests/ryzenai/vaip_config.json @@ -1,306 +1,215 @@ { - "passes": [ - { - "name": "init", - "plugin": "vaip-pass_init" - }, - { - "name": "fuse_resize_norm", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_resize_norm", - "methodName": "rules" - } - }, - { - "name": "fuse_softmax", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_softmax", - "methodName": "rules" - } - }, - { - "name": "fuse_topk", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_topk", - "methodName": "rules" - } - }, - { - "name": "fuse_decode_filter_boxes", - "plugin": "vaip-pass_py_ext", - "disabled": false, - "pyExt": { - "moduleName": "voe.passes.fuse_decode_filter_boxes", - "methodName": "rules" - } - }, - { - "name": "fuse_NMS", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.fuse_NMS", - "methodName": "rules" - } - }, - { - "name": "fuse_DPU", - "plugin": "vaip-pass_level1_dpu", - "passDpuParam": { - "subPass": [ - { - "_comment" : "# issue 1048", - "name": "convert_ending_blacklist_ops_to_unknown_op", - "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", - "disabled": false - }, - { - "_comment" : "test case : yolov5s6", - "name": "manual_partition", - "plugin": "vaip-pass_manual_partition", - "disabled": true, - "manualPartition": { - "fromOps": [ - "1745/duplicated_token_14", - "1764/duplicated_token_10", - "1783/duplicated_token_6", - "1802/duplicated_token_2" - ], - "toOps": [ - "2895" - ] - } - }, - { - "name": "dynamic_input_batch", - "plugin": "vaip-pass_dynamic_input_batch" - }, - { - "_comment" : "test case q_operator_resnet50", - "name": "convert_qlinear_to_qdq", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "enableGc": true, - "pyExt": { - "moduleName": "voe.passes.convert_qlinear_to_qdq", - "methodName": "rules" - } - }, - { - "name": "create_const_op", - "plugin": "vaip-pass_create_const_op" - }, - { - "name": "convert_to_xir_op", - "plugin": "vaip-pass_py_ext", - "disabled" : false, - "pyExt": { - "moduleName": "voe.passes.convert_to_xir_op", - "methodName": "rules" - } - }, - { - "name": "to_xir", - "plugin": "vaip-pass_to_xir_ops" - }, - { - "name": "remove_extra_q_dq", - "plugin": "vaip-pass_remove_extra_q_dq" - }, - { - "name": "merge_add_into_conv_bias", - "plugin": "vaip-pass_merge_add_into_conv_bias" - }, - { - "name": "merge_fix", - "plugin": "vaip-pass_py_ext", - "enableGc": true, - "pyExt": { - "moduleName": "voe.passes.merge_fix", - "methodName": "rules" - } - }, - { - "name": "layoutransform", - "plugin": "vaip-pass_layout_transform_via_adding_transpose" - }, - { - "name": "gc_after_layout_transform", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "fuse_transpose", - "plugin": "vaip-pass_fuse_transpose", - "enableGc": true - }, - { - "name": "gc_after_fuse_transpose", - "plugin": "vaip-pass_remove_isolated_node" - }, - { - "name": "remove_identity", - "plugin": "vaip-pass_remove_identity", - "logVerbosity": 1 - }, - { - "name": "add_fix_after_const", - "plugin": "vaip-pass_const_add_fix" - }, - { - "_comment" : "test case 41 see issue #611 #626 for more detail", - "name": "merge_duplicated_fix", - "plugin": "vaip-pass_merge_duplicated_fix", - "disabled": true, - "enableGc": true - }, - { - "_comment": "test case 112", - "name": "remove_reshape_fix", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.remove_reshape_fix", - "methodName": "rules" - } - }, - { - "_comment" : "test case 5", - "name": "const_fold_batchnorm_to_scale", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.const_fold_batchnorm_to_scale", - "methodName": "rules" - } - }, - { - "name": "const_fold_transpose", - "plugin": "vaip-pass_const_fold_transpose" - }, - { - "name": "merge_pad", - "plugin": "vaip-pass_merge_pad" - }, - { - "name": "merge_hard_sigmoid", - "plugin": "vaip-pass_merge_hard_sigmoid" - }, - { - "_comment" : "test case 112", - "name": "merge_mul", - "plugin": "vaip-pass_py_ext", - "pyExt": { - "moduleName": "voe.passes.merge_mul", - "methodName": "rules" - } - }, - { - "name": "merge_consecutive_fix", - "plugin": "vaip-pass_merge_consecutive_fix", - "disabled": true, - "enableLog": true, - "logVerbosity": 1 - }, - { - "name": "graph_output_add_node", - "plugin": "vaip-pass_graph_output_add_node", - "disabled": true - }, - { - "_comment" : "test case 20", - "name": "convert_transpose_add_fix_input_fix_input", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.convert_transpose_add_fix_input_fix_input", - "methodName": "process" - } - }, - { - "_comment" : "test case 100", - "name": "convert_transpose_fix_pad_fix_input", - "plugin": "vaip-pass_py_ext", - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.convert_transpose_fix_pad_fix_input", - "methodName": "process" - } - }, - { - "_comment" : "test case 100", - "name": "convert_transpose_fix_input", - "plugin": "vaip-pass_py_ext", - "enableGc": true, - "disabled": true, - "pyExt": { - "moduleName": "voe.passes.convert_transpose_fix_input", - "methodName": "process" - } - }, - { - "_comment": "test case 110", - "name": "convert_softmax_to_hard_softmax", - "plugin": "vaip-pass_py_ext", - "disabled" : true, - "pyExt": { - "moduleName": "voe.passes.convert_softmax_to_hard_softmax", - "methodName": "rules" - } - }, - { - "_comment": "test case 43", - "name": "remove_top_transpose", - "plugin": "vaip-pass_merge_input_transpose", - "disabled": true, - "enableGc": true - }, - { - "_comment": "test case 110", - "name": "remove_bottom_transpose", - "plugin": "vaip-pass_remove_bottom_transpose", - "disabled": true, - "enableGc": true + "passes": [ + { + "name": "init", + "plugin": "vaip-pass_init" }, - { - "name": "final_gc", - "plugin": "vaip-pass_remove_isolated_node" - } - ], - "xcompilerAttrs": { - "debug_mode" : { - "stringValue" : "performance" - }, - "dpu_subgraph_num" : { - "intValue" : 32 - }, - "opt_level" : { - "intValue" : 0 - }, - "dump_subgraph_ops" : { - "boolValue" : false - }, - "profile" : { - "intValue" : 0 - }, - "prefetch" : { - "boolValue" : false - }, - "preassign" : { - "boolValue" : false - }, - "disable_std_quant" : { - "boolValue" : false - }, - "concat_skip_code_gen" : { - "boolValue" : false + { + "name": "fuse_resize_norm", + "plugin": "vaip-pass_py_ext", + "disabled": false, + "pyExt": { + "moduleName": "voe.passes.fuse_resize_norm", + "methodName": "rules" } - }, - "minimum_num_of_conv": 2 - } + }, + { + "name": "fuse_softmax", + "plugin": "vaip-pass_py_ext", + "disabled": false, + "pyExt": { + "moduleName": "voe.passes.fuse_softmax", + "methodName": "rules" + } + }, + { + "name": "fuse_topk", + "plugin": "vaip-pass_py_ext", + "disabled": false, + "pyExt": { + "moduleName": "voe.passes.fuse_topk", + "methodName": "rules" + } + }, + { + "name": "fuse_decode_filter_boxes", + "plugin": "vaip-pass_py_ext", + "disabled": false, + "pyExt": { + "moduleName": "voe.passes.fuse_decode_filter_boxes", + "methodName": "rules" + } + }, + { + "name": "fuse_NMS", + "plugin": "vaip-pass_py_ext", + "disabled": true, + "pyExt": { + "moduleName": "voe.passes.fuse_NMS", + "methodName": "rules" + } + }, + { + "name": "fuse_DPU", + "plugin": "vaip-pass_level1_dpu", + "passDpuParam": { + "subPass": [ + { + "name": "convert_ending_blacklist_ops_to_unknown_op", + "plugin": "vaip-pass_convert_ending_blacklist_ops_to_unknown_op", + "disabled": false + }, + { + "name": "dynamic_input_batch", + "plugin": "vaip-pass_dynamic_input_batch" + }, + { + "name": "create_const_op", + "plugin": "vaip-pass_create_const_op" + }, + { + "name": "convert_to_xir_op", + "plugin": "vaip-pass_py_ext", + "pyExt": { + "moduleName": "voe.passes.convert_to_xir_op", + "methodName": "rules" + } + }, + { + "name": "to_xir", + "plugin": "vaip-pass_to_xir_ops" + }, + { + "name": "remove_extra_q_dq", + "plugin": "vaip-pass_remove_extra_q_dq" + }, + { + "name": "merge_add_into_conv_bias", + "plugin": "vaip-pass_merge_add_into_conv_bias" + }, + { + "name": "merge_fix", + "plugin": "vaip-pass_merge_fix", + "enableGc": true + }, + { + "name": "layoutransform", + "plugin": "vaip-pass_layout_transform_via_adding_transpose" + }, + { + "name": "gc_after_layout_transform", + "plugin": "vaip-pass_remove_isolated_node" + }, + { + "name": "fuse_transpose", + "plugin": "vaip-pass_fuse_transpose", + "enableGc": true + }, + { + "name": "gc_after_fuse_transpose", + "plugin": "vaip-pass_remove_isolated_node" + }, + { + "name": "remove_identity", + "plugin": "vaip-pass_remove_identity", + "logVerbosity": 1 + }, + { + "name": "add_fix_after_const", + "plugin": "vaip-pass_const_add_fix" + }, + { + "name": "remove_reshape_fix", + "plugin": "vaip-pass_py_ext", + "pyExt": { + "moduleName": "voe.passes.remove_reshape_fix", + "methodName": "rules" + } + }, + { + "name": "const_fold_batchnorm_to_scale", + "plugin": "vaip-pass_py_ext", + "pyExt": { + "moduleName": "voe.passes.const_fold_batchnorm_to_scale", + "methodName": "rules" + } + }, + { + "name": "const_fold_transpose", + "plugin": "vaip-pass_const_fold_transpose" + }, + { + "name": "merge_pad", + "plugin": "vaip-pass_merge_pad" + }, + { + "name": "merge_hard_sigmoid", + "plugin": "vaip-pass_merge_hard_sigmoid" + }, + { + "name": "merge_mul", + "plugin": "vaip-pass_py_ext", + "enableGc": true, + "pyExt": { + "moduleName": "voe.passes.merge_mul", + "methodName": "rules" + } + }, + { + "name": "merge_consecutive_fix", + "plugin": "vaip-pass_merge_consecutive_fix", + "disabled": true, + "enableLog": true, + "logVerbosity": 1 + }, + { + "_comment": "test case 110", + "name": "convert_softmax_to_hard_softmax", + "plugin": "vaip-pass_py_ext", + "disabled": true, + "pyExt": { + "moduleName": "voe.passes.convert_softmax_to_hard_softmax", + "methodName": "rules" + } + }, + { + "name": "merge_fix_fix_transpose", + "plugin": "vaip-pass_py_ext", + "enableGc": true, + "disabled": true, + "pyExt": { + "moduleName": "voe.passes.merge_fix_fix_transpose", + "methodName": "rules" + } + }, + { + "name": "final_gc", + "plugin": "vaip-pass_remove_isolated_node" + } + ], + "xcompilerAttrs": { + "debug_mode": { + "stringValue": "performance" + }, + "dpu_subgraph_num": { + "uintValue": 32 + }, + "opt_level": { + "uintValue": 0 + }, + "dump_subgraph_ops": { + "boolValue": false + }, + "profile": { + "uintValue": 0 + }, + "disable_std_quant": { + "boolValue": false + } + }, + "minimum_num_of_conv": 2 + } + } + ] } - ] -} + \ No newline at end of file diff --git a/utils/ryzenai/README.md b/utils/ryzenai/README.md index 2d1b75ac..912dcbc3 100644 --- a/utils/ryzenai/README.md +++ b/utils/ryzenai/README.md @@ -1,7 +1,7 @@ # Utilities -## Generate IPU Baseline Operators +## Generate NPU Baseline Operators * Setup the Ryzen testing environment.