diff --git a/.github/workflows/test_ryzenai_nightly.yaml b/.github/workflows/test_ryzenai_nightly.yaml
index f573d1a1..cf1eb81d 100644
--- a/.github/workflows/test_ryzenai_nightly.yaml
+++ b/.github/workflows/test_ryzenai_nightly.yaml
@@ -26,7 +26,7 @@ jobs:
       slow_test: true
       timeout_minutes: 1200
     secrets:
-      hf_hub_read_token: ${{ secrets.HF_READ_TOKEN }}
+      hf_hub_read_token: ${{ secrets.HF_HUB_READ_TOKEN }}
   send_results:
     name: Send results to webhook
     runs-on: ubuntu-22.04
diff --git a/.github/workflows/test_ryzenai_quantization_timm.yaml b/.github/workflows/test_ryzenai_quantization_timm.yaml
index bc313660..d8900331 100644
--- a/.github/workflows/test_ryzenai_quantization_timm.yaml
+++ b/.github/workflows/test_ryzenai_quantization_timm.yaml
@@ -27,4 +27,4 @@ jobs:
     with:
       test_file: "tests/ryzenai/test_quantization.py"
     secrets:
-      hf_hub_read_token: ${{ secrets.HF_READ_TOKEN }}
+      hf_hub_read_token: ${{ secrets.HF_HUB_READ_TOKEN }}
diff --git a/docs/source/ryzenai/package_reference/quantization.mdx b/docs/source/ryzenai/package_reference/quantization.mdx
index 8dcd8b45..fcda9e88 100644
--- a/docs/source/ryzenai/package_reference/quantization.mdx
+++ b/docs/source/ryzenai/package_reference/quantization.mdx
@@ -15,3 +15,21 @@ Please refer to the guide [How to apply quantization](https://huggingface.co/doc
 ### QuantizationConfig
 
 [[autodoc]] ryzenai.QuantizationConfig
+
+### CalibrationMethod
+
+[[autodoc]] ryzenai.CalibrationMethod
+
+### QuantFormat
+    
+[[autodoc]] ryzenai.QuantFormat
+
+### QuantType
+
+[[autodoc]] ryzenai.QuantType
+
+### ExtraOptions
+
+[[autodoc]] ryzenai.ExtraOptions
+
+
diff --git a/optimum/amd/ryzenai/__init__.py b/optimum/amd/ryzenai/__init__.py
index 59569806..9e9f6966 100644
--- a/optimum/amd/ryzenai/__init__.py
+++ b/optimum/amd/ryzenai/__init__.py
@@ -7,7 +7,16 @@
 
 
 _import_structure = {
-    "configuration": ["RyzenAIConfig", "QuantizationConfig", "AutoQuantizationConfig"],
+    "configuration": [
+        "AutoQuantizationConfig",
+        "CalibrationMethod",
+        "ExtraOptions",
+        "QuantFormat",
+        "QuantizationConfig",
+        "QuantType",
+        "AutoQuantizationConfig",
+        "RyzenAIConfig",
+    ],
     "modeling": [
         "RyzenAIModel",
         "RyzenAIModelForCustomTasks",
@@ -24,7 +33,15 @@
 
 # Direct imports for type-checking
 if TYPE_CHECKING:
-    from .configuration import AutoQuantizationConfig, QuantizationConfig, RyzenAIConfig
+    from .configuration import (
+        AutoQuantizationConfig,
+        CalibrationMethod,
+        ExtraOptions,
+        QuantFormat,
+        QuantizationConfig,
+        QuantType,
+        RyzenAIConfig,
+    )
     from .modeling import (
         RyzenAIModel,
         RyzenAIModelForCustomTasks,
diff --git a/optimum/amd/ryzenai/configuration.py b/optimum/amd/ryzenai/configuration.py
index c043d473..51efca39 100644
--- a/optimum/amd/ryzenai/configuration.py
+++ b/optimum/amd/ryzenai/configuration.py
@@ -2,16 +2,266 @@
 # Licensed under the MIT License.
 """Configuration classes for quantization with RyzenAI."""
 
-from dataclasses import asdict, dataclass
+import re
+from dataclasses import asdict, dataclass, field, fields
 from enum import Enum
-from typing import Optional
+from typing import Dict, List, Literal, Optional, Tuple, Union
 
 import vai_q_onnx
-from onnxruntime.quantization import CalibrationMethod, QuantFormat, QuantType
 
 from optimum.configuration_utils import BaseConfig
 
 
+QUANT_TYPE_MAPPING = {
+    "uint8": vai_q_onnx.QuantType.QUInt8,
+    "int8": vai_q_onnx.QuantType.QInt8,
+    "uint16": vai_q_onnx.VitisQuantType.QUInt16,
+    "int16": vai_q_onnx.VitisQuantType.QInt16,
+    "uint32": vai_q_onnx.VitisQuantType.QUInt32,
+    "int32": vai_q_onnx.VitisQuantType.QInt32,
+    "float16": vai_q_onnx.VitisQuantType.QFloat16,
+    "bfloat16": vai_q_onnx.VitisQuantType.QBFloat16,
+}
+
+QUANT_FORMAT_MAPPING = {
+    "qop": vai_q_onnx.QuantFormat.QOperator,
+    "qdq": vai_q_onnx.QuantFormat.QDQ,
+    "vitisqdq": vai_q_onnx.VitisQuantFormat.QDQ,
+    "vitisfixneuron": vai_q_onnx.VitisQuantFormat.FixNeuron,
+}
+
+CALIBRATION_METHOD_MAPPING = {
+    "minmax": vai_q_onnx.CalibrationMethod.MinMax,
+    "entropy": vai_q_onnx.CalibrationMethod.Entropy,
+    "percentile": vai_q_onnx.CalibrationMethod.Percentile,
+    "nonoverflow": vai_q_onnx.PowerOfTwoMethod.NonOverflow,
+    "mse": vai_q_onnx.PowerOfTwoMethod.MinMSE,
+}
+
+
+class CalibrationMethod(Enum):
+    """CalibrationMethod is an enumeration of the calibration methods supported by RyzenAI quantization."""
+
+    MinMax = vai_q_onnx.CalibrationMethod.MinMax
+    Entropy = vai_q_onnx.CalibrationMethod.Entropy
+    Percentile = vai_q_onnx.CalibrationMethod.Percentile
+    NonOverflow = vai_q_onnx.PowerOfTwoMethod.NonOverflow
+    MinMSE = vai_q_onnx.PowerOfTwoMethod.MinMSE
+
+
+@dataclass
+class ExtraOptions:
+    """
+    ExtraOptions is a dataclass handling additional options for quantization.
+
+    Args:
+        activation_symmetric (`bool`, defaults to `False`):
+            If True, symmetrize calibration data for activations.
+        weight_symmetric (`bool`, defaults to `True`):
+            If True, symmetrize calibration data for weights.
+        use_unsigned_relu (`bool`, defaults to `False`):
+            If True, the output tensor of ReLU and Clip, whose min is 0, will be forced to be asymmetric.
+        quantize_bias (`bool`, defaults to `True`):
+            If True, quantize the Bias as normal weights.
+        remove_input_init (`bool`, defaults to `True`):
+            If True, initializer in graph inputs will be removed because it will not be treated as a constant value/weight.
+            This may prevent some of the graph optimizations, like const folding.
+        enable_subgraph (`bool`, defaults to `False`):
+            If True, the subgraph will be quantized. More support for this feature is planned in the future.
+        force_quantize_no_input_check (`bool`, defaults to `False`):
+            If True, latent operators such as maxpool and transpose will always quantize their inputs, generating quantized
+            outputs even if their inputs have not been quantized.
+        matmul_const_b_only (`bool`, defaults to `False`):
+            If True, only MatMul operations with a constant 'B' will be quantized.
+        add_qdq_pair_to_weight (`bool`, defaults to `False`):
+            If True, both QuantizeLinear and DeQuantizeLinear nodes are inserted for weight, maintaining its floating-point format.
+            In the PowerOfTwoMethod calibration method, this setting will also be effective for the bias.
+        op_types_to_exclude_output_quantization (`List[str] or None`, defaults to `[]`):
+            If specified, the output of operators with these types will not be quantized.
+        dedicated_qdq_pair (`bool`, defaults to `False`):
+            If True, an identical and dedicated QDQ pair is created for each node, allowing multiple nodes to share a single QDQ pair
+            as their inputs.
+        qdq_op_type_per_channel_support_to_axis (`Dict`, defaults to `{}`):
+            Sets the channel axis for specific operator types (e.g., {'MatMul': 1}).
+        use_qdq_vitis_custom_ops (`bool`, defaults to `True`):
+            If True, The UInt8 and Int8 quantization will be executed by the custom operations library, otherwise by the library
+            of onnxruntime extensions. Only valid in vai_q_onnx.VitisQuantFormat.QDQ.
+        calib_tensor_range_symmetric (`bool`, defaults to `False`):
+            If True, the final range of the tensor during calibration will be symmetrically set around the central point "0".
+            In PowerOfTwoMethod calibration method, the default is True.
+        calib_moving_average (`bool`, defaults to `False`):
+            If True, the moving average of the minimum and maximum values will be computed when the calibration method selected is
+            MinMax. In PowerOfTwoMethod calibration method, this should be set to False.
+        calib_moving_average_constant (`float`, defaults to `0.01`):
+            Specifies the constant smoothing factor to use when computing the moving average of the minimum and maximum values.
+            Only effective when the calibration method selected is MinMax and CalibMovingAverage is set to True.
+            In PowerOfTwoMethod calibration method, this option is unsupported.
+        random_data_reader_input_data_range (`Dict or None`, defaults to `None`):
+            Specifies the data range for each input if used random data reader (calibration_data_reader is None).
+        int16_scale (`bool`, defaults to `False`):
+            If True, the float scale will be replaced by the closest value corresponding to M and 2**N, where the range of M and 2**N
+            is within the representation range of int16 and uint16.
+        min_mse_mode (`str`, defaults to `'All'`):
+            When using vai_q_onnx.PowerOfTwoMethod.MinMSE, you can specify the method for calculating minmse.
+            By default, minmse is calculated using all calibration data. Alternatively, you can set the mode to "MostCommon",
+            where minmse is calculated for each batch separately and take the most common value.
+        convert_bn_to_conv (`bool`, defaults to `True`):
+            If True, the BatchNormalization operation will be converted to Conv operation when enable_ipu_cnn is True.
+        convert_reduce_mean_to_global_avg_pool (`bool`, defaults to `True`):
+            If True, the Reduce Mean operation will be converted to Global Average Pooling operation when enable_ipu_cnn is True.
+        split_large_kernel_pool (`bool`, defaults to `True`):
+            If True, the large kernel Global Average Pooling operation will be split into multiple Average Pooling operation when
+            enable_ipu_cnn is True.
+        convert_split_to_slice (`bool`, defaults to `True`):
+            If True, the Split operation will be converted to Slice operation when enable_ipu_cnn is True.
+        fuse_instance_norm (`bool`, defaults to `False`):
+            If True, the split instance norm operation will be fused to InstanceNorm operation when enable_ipu_cnn is True.
+        fuse_l2_norm (`bool`, defaults to `False`):
+            If True, a set of L2norm operations will be fused to L2Norm operation when enable_ipu_cnn is True.
+        convert_clip_to_relu (`bool`, defaults to `False`):
+            If True, the Clip operations that have a min value of 0 will be converted to ReLU operations.
+        simulate_dpu (`bool`, defaults to `True`):
+            If True, a simulation transformation that replaces some operations with an approximate implementation will be applied
+            for DPU when enable_ipu_cnn is True.
+        convert_leaky_relu_to_dpu_version (`bool`, defaults to `True`):
+            If True, the Leaky Relu operation will be converted to DPU version when SimulateDPU is True.
+        convert_sigmoid_to_hard_sigmoid (`bool`, defaults to `True`):
+            If True, the Sigmoid operation will be converted to Hard Sigmoid operation when SimulateDPU is True.
+        convert_hard_sigmoid_to_dpu_version (`bool`, defaults to `True`):
+            If True, the Hard Sigmoid operation will be converted to DPU version when SimulateDPU is True.
+        convert_avg_pool_to_dpu_version (`bool`, defaults to `True`):
+            If True, the global or kernel-based Average Pooling operation will be converted to DPU version when SimulateDPU is True.
+        convert_reduce_mean_to_dpu_version (`bool`, defaults to `True`):
+            If True, the ReduceMean operation will be converted to DPU version when SimulateDPU is True.
+        convert_softmax_to_dpu_version (`bool`, defaults to `False`):
+            If True, the Softmax operation will be converted to DPU version when SimulateDPU is True.
+        ipu_limitation_check (`bool`, defaults to `True`):
+            If True, the quantization scale will be adjusted due to the limitation of DPU/NPU.
+        adjust_shift_cut (`bool`, defaults to `True`):
+            If True, adjust the shift cut of nodes when ipu_limitation_check is True.
+        adjust_shift_bias (`bool`, defaults to `True`):
+            If True, adjust the shift bias of nodes when ipu_limitation_check is True.
+        adjust_shift_read (`bool`, defaults to `True`):
+            If True, adjust the shift read of nodes when ipu_limitation_check is True.
+        adjust_shift_write (`bool`, defaults to `True`):
+            If True, adjust the shift write of nodes when ipu_limitation_check is True.
+        adjust_hard_sigmoid (`bool`, defaults to `True`):
+            If True, adjust the pos of hard sigmoid nodes when ipu_limitation_check is True.
+        adjust_shift_swish (`bool`, defaults to `True`):
+            If True, adjust the shift swish when ipu_limitation_check is True.
+        align_concat (`bool`, defaults to `True`):
+            If True, adjust the quantization pos of concat when ipu_limitation_check is True.
+        align_pool (`bool`, defaults to `True`):
+            If True, adjust the quantization pos of pooling when ipu_limitation_check is True.
+        replace_clip6_relu (`bool`, defaults to `False`):
+            If True, replace Clip(0,6) with Relu in the model.
+        cle_steps (`int`, defaults to `1`):
+            Specifies the steps for CrossLayerEqualization execution when include_cle is set to true. When set to -1,
+            an adaptive CrossLayerEqualization will be conducted.
+        cle_total_layer_diff_threshold (`float`, defaults to `2e-7`):
+            Specifies The threshold represents the sum of mean transformations of CrossLayerEqualization transformations across
+            all layers when utilizing CrossLayerEqualization.
+        cle_scale_append_bias (`bool`, defaults to `True`):
+            Whether the bias be included when calculating the scale of the weights.
+        remove_qdq_conv_leaky_relu (`bool`, defaults to `False`):
+            If True, the QDQ between Conv and LeakyRelu will be removed for DPU when enable_ipu_cnn is True.
+        remove_qdq_conv_prelu (`bool`, defaults to `False`):
+            If True, the QDQ between Conv and PRelu will be removed for DPU when enable_ipu_cnn is True.
+    """
+
+    activation_symmetric: bool = False
+    weight_symmetric: bool = True
+    use_unsigned_relu: bool = False
+    quantize_bias: bool = True
+    remove_input_init: bool = True
+    enable_subgraph: bool = False
+    force_quantize_no_input_check: bool = False
+    matmul_const_b_only: bool = False
+    add_qdq_pair_to_weight: bool = False
+    op_types_to_exclude_output_quantization: Union[List[str], None] = field(default_factory=list)
+    dedicated_qdq_pair: bool = False
+    qdq_op_type_per_channel_support_to_axis: Dict = field(default_factory=dict)
+    use_qdq_vitis_custom_ops: bool = True
+    calib_tensor_range_symmetric: bool = False
+    calib_moving_average: bool = False
+    calib_moving_average_constant: float = 0.01
+    random_data_reader_input_data_range: Union[Dict, None] = None
+    int16_scale: bool = False
+    min_mse_mode: str = "All"
+    convert_bn_to_conv: bool = True
+    convert_reduce_mean_to_global_avg_pool: bool = True
+    split_large_kernel_pool: bool = True
+    convert_split_to_slice: bool = True
+    fuse_instance_norm: bool = False
+    fuse_l2_norm: bool = False
+    convert_clip_to_relu: bool = False
+    simulate_dpu: bool = True
+    convert_leaky_relu_to_dpu_version: bool = True
+    convert_sigmoid_to_hard_sigmoid: bool = True
+    convert_hard_sigmoid_to_dpu_version: bool = True
+    convert_avg_pool_to_dpu_version: bool = True
+    convert_reduce_mean_to_dpu_version: bool = True
+    convert_softmax_to_dpu_version: bool = False
+    ipu_limitation_check: bool = True
+    adjust_shift_cut: bool = True
+    adjust_shift_bias: bool = True
+    adjust_shift_read: bool = True
+    adjust_shift_write: bool = True
+    adjust_hard_sigmoid: bool = True
+    adjust_shift_swish: bool = True
+    align_concat: bool = True
+    align_pool: bool = True
+    replace_clip6_relu: bool = False
+    cle_steps: int = 1
+    cle_total_layer_diff_threshold: float = 2e-7
+    cle_scale_append_bias: bool = True
+    remove_qdq_conv_leaky_relu: bool = False
+    remove_qdq_conv_prelu: bool = False
+
+    @property
+    def snake_to_camel(self):
+        return {
+            "qdq_op_type_per_channel_support_to_axis": "QDQOpTypePerChannelSupportToAxis",
+            "ipu_limitation_check": "IPULimitationCheck",
+            "cle_steps": "CLESteps",
+            "cle_total_layer_diff_threshold": "CLETotalLayerDiffThreshold",
+            "cle_scale_append_bias": "CLEScaleAppendBias",
+        }
+
+    @property
+    def camel_to_snake(self):
+        return {value: key for key, value in self.snake_to_camel.items()}
+
+    def __setattr__(self, name, value):
+        snake_case_name = self.camel_to_snake.get(name, re.sub(r"([A-Z])", r"_\1", name).lower().lstrip("_"))
+
+        super().__setattr__(snake_case_name, value)
+
+    def __getattr__(self, name):
+        snake_case_name = self.camel_to_snake.get(name, re.sub(r"([A-Z])", r"_\1", name).lower().lstrip("_"))
+        return getattr(self, snake_case_name)
+
+    def to_diff_dict(self, camel_case=False) -> dict:
+        """
+        Returns a dictionary of non-default values in the configuration.
+        """
+        non_default_values = {}
+        for option in fields(self):
+            if camel_case:
+                name = self.snake_to_camel.get(
+                    option.name, "".join(word.capitalize() for word in option.name.split("_"))
+                )
+            else:
+                name = option.name
+            if (
+                getattr(self, option.name) != option.default
+                and getattr(self, option.name) != {}
+                and getattr(self, option.name) != []
+            ):
+                non_default_values[name] = getattr(self, option.name)
+        return non_default_values
+
+
 @dataclass
 class QuantizationConfig:
     """
@@ -20,82 +270,271 @@ class QuantizationConfig:
     Args:
         is_static (`bool`):
             Whether to apply static quantization or dynamic quantization.
-        format (`QuantFormat`):
-            Targeted RyzenAI quantization representation format.
-            For the Operator Oriented (QOperator) format, all the quantized operators have their own ONNX definitions.
-            For the Tensor Oriented (QDQ) format, the model is quantized by inserting QuantizeLinear / DeQuantizeLinear
-            operators.
-        calibration_method (`CalibrationMethod`):
-            The method chosen to calculate the activations quantization parameters using the calibration dataset.
-        activations_dtype (`QuantType`, defaults to `QuantType.QUInt8`):
-            The quantization data types to use for the activations.
-        activations_symmetric (`bool`, defaults to `False`):
-            Whether to apply symmetric quantization on the activations.
-        weights_dtype (`QuantType`, defaults to `QuantType.QInt8`):
-            The quantization data types to use for the weights.
-        weights_symmetric (`bool`, defaults to `True`):
-            Whether to apply symmetric quantization on the weights.
-        enable_dpu (`bool`, defaults to `True`):
-            Determines whether to generate a quantized model that is suitable for the DPU. If set to True, the quantization
-            process will create a model that is optimized for DPU computations.
-
+        format (`Union[QuantFormat, str]`, defaults to `QuantFormat.QDQ`):
+            This parameter is used to specify the quantization format of the model.
+            Options:
+            - `QuantFormat.QOperator`: Quantizes the model directly using quantized operators.
+            - `QuantFormat.QDQ`: Quantizes the model by inserting QuantizeLinear/DeQuantizeLinear into the tensor.
+              Supports 8-bit quantization only.
+            - `QuantFormat.VitisQuantFormat`: Quantizes the model by inserting VitisQuantizeLinear/VitisDequantizeLinear
+              into the tensor. Supports a wider range of bit-widths and precisions.
+            - `QuantFormat.FixNeuron` (Experimental): Quantizes the model by inserting FixNeuron (a combination of
+              QuantizeLinear and DeQuantizeLinear) into the tensor. Experimental and not recommended for deployment.
+        calibration_method (`Union[CalibrationMethod, str]`, defaults to `CalibrationMethod.MinMSE`):
+            The method used in calibration.
+            Options (for CNNs running on NPU, power-of-two methods; for Transformers on NPU or CNNs on CPU, float scale methods):
+            - `CalibrationMethod.NonOverflow`: Power-of-two method to prevent min/max values from overflowing.
+            - `CalibrationMethod.MinMSE`: Power-of-two method to minimize mean-square-loss of quantized values and float values.
+              Longer calibration time but usually better accuracy.
+            - `CalibrationMethod.MinMax`: Obtain quantization parameters based on minimum and maximum values of each tensor.
+            - `CalibrationMethod.Entropy`: Determine quantization parameters based on the entropy algorithm of each tensor's distribution.
+            - `CalibrationMethod.Percentile`: Calculate quantization parameters using percentiles of tensor values.
+        enable_ipu_cnn (`bool`, defaults to `True`):
+            Flag to generate a quantized model suitable for DPU/NPU computations. If True, the quantization process will
+            consider specific limitations and requirements of DPU/NPU, optimizing the model accordingly.
+        input_nodes (`List[str]`, defaults to an empty list `[]`):
+            List of names of starting nodes to be quantized. Nodes before these nodes will not be quantized.
+        output_nodes (`List[str]`, defaults to an empty list `[]`):
+            List of names of end nodes to be quantized. Nodes after these nodes will not be quantized.
+        op_types_to_quantize (`List[str]`, defaults to an empty list `[]`):
+            If specified, only operators of the given types will be quantized (e.g., ['Conv'] to quantize Convolutional layers).
+        random_data_reader_input_shape (`Union[List[int], Tuple[int], Dict[str, List[int]]]`, defaults to an empty list `[]`):
+            Shapes of input nodes for internal random data reader. If dynamic axes require specific values, provide shapes.
+            Format: list/tuple for single input, list of lists for multiple inputs, or dict {name: shape} for named inputs.
+        per_channel (`bool`, defaults to `False`):
+            Determines whether weights should be quantized per channel. Must be False for DPU/NPU devices.
+        reduce_range (`bool`, defaults to `False`):
+            If True, quantizes weights with 7-bits. Must be False for DPU/NPU devices.
+        activations_dtype (`QuantType`, defaults to `quint8`):
+            Specifies the quantization data type for activations.
+        weights_dtype (`QuantType`, defaults to `qint8`):
+            Specifies the quantization data type for weights. Must be `QuantType.QInt8` for NPU devices.
+        nodes_to_quantize (`List[str]`, defaults to an empty list `[]`):
+            If specified, only the nodes in this list are quantized.
+        nodes_to_exclude (`List[str]`, defaults to an empty list `[]`):
+            If specified, nodes in this list will be excluded from quantization.
+        optimize_model (`bool`, defaults to `True`):
+            If True, optimizes the model before quantization.
+        use_external_data_format (`bool`, defaults to `False`):
+            Flag for large size (>2GB) models. If True, model proto and data will be stored in separate files.
+        execution_providers (`List[str]`, defaults to `['CPUExecutionProvider']`):
+            Defines the execution providers used by ONNX Runtime for model calibration.
+        convert_fp16_to_fp32 (`bool`, defaults to `False`):
+            Controls whether to convert the input model from float16 to float32 before quantization.
+        convert_nchw_to_nhwc (`bool`, defaults to `False`):
+            Controls whether to convert the input NCHW model to NHWC model before quantization.
+        include_cle (`bool`, defaults to `False`):
+            Flag to optimize models using CrossLayerEqualization; can improve accuracy for some models.
+        extra_options (`Union[Dict, None, ExtraOptions]`, defaults to an instance of `ExtraOptions` with default values):
+            Contains key-value pairs for various options in different cases.
     """
 
-    format: QuantFormat = QuantFormat.QDQ
-    calibration_method: CalibrationMethod = vai_q_onnx.PowerOfTwoMethod.MinMSE
-    activations_dtype: QuantType = QuantType.QUInt8
-    activations_symmetric: bool = True
-    weights_dtype: QuantType = QuantType.QInt8
-    weights_symmetric: bool = True
-    enable_dpu: bool = True
+    format: Literal["qdq", "qop", "vitisqdq"] = "qdq"
+    calibration_method: Literal["nonoverflow", "mse", "minmax", "entropy", "percentile"] = "mse"
+    input_nodes: List[str] = field(default_factory=list)
+    output_nodes: List[str] = field(default_factory=list)
+    op_types_to_quantize: List[str] = field(default_factory=list)
+    random_data_reader_input_shape: Union[List[int], Tuple[int], Dict[str, List[int]]] = field(default_factory=list)
+    per_channel: bool = False
+    reduce_range: bool = False
+    activations_dtype: Literal["uint8", "int8", "uint16", "int16", "uint32", "int32", "bfloat16", "float16"] = "uint8"
+    weights_dtype: Literal["uint8", "int8", "uint16", "int16", "uint32", "int32", "bfloat16", "float16"] = "int8"
+    nodes_to_quantize: List[str] = field(default_factory=list)
+    nodes_to_exclude: List[str] = field(default_factory=list)
+    optimize_model: bool = True
+    use_external_data_format: bool = False
+    execution_providers: List[str] = field(default_factory=lambda: ["CPUExecutionProvider"])
+    enable_ipu_cnn: bool = False
+    convert_fp16_to_fp32: bool = False
+    convert_nchw_to_nhwc: bool = False
+    include_cle: bool = False
+    extra_options: ExtraOptions = field(default_factory=ExtraOptions)
 
-    @staticmethod
-    def quantization_type_str(activations_dtype: QuantType, weights_dtype: QuantType) -> str:
-        return (
-            f"{'s8' if activations_dtype == QuantType.QInt8 else 'u8'}"
-            f"/"
-            f"{'s8' if weights_dtype == QuantType.QInt8 else 'u8'}"
-        )
+    def __post_init__(self):
+        if isinstance(self.extra_options, dict):
+            self.extra_options = ExtraOptions(**self.extra_options)
+
+        if self.calibration_method in {vai_q_onnx.PowerOfTwoMethod.NonOverflow, vai_q_onnx.PowerOfTwoMethod.MinMSE}:
+            self.extra_options.calib_tensor_range_symmetric = True
+
+        if (
+            self.activations_dtype not in {vai_q_onnx.QuantType.QUInt8, vai_q_onnx.QuantType.QInt8}
+            and self.format != vai_q_onnx.VitisQuantFormat.QDQ
+        ):
+            raise ValueError(
+                f'activations_dtype is: "{self.activations_dtype.name.lower()}", format must be "vitisqdq".'
+            )
+        if (
+            self.weights_dtype not in {vai_q_onnx.QuantType.QUInt8, vai_q_onnx.QuantType.QInt8}
+            and self.format != vai_q_onnx.VitisQuantFormat.QDQ
+        ):
+            raise ValueError(f'weights_dtype is: "{self.weights_dtype.name.lower()}", format must be "vitisqdq".')
+
+        if self.enable_ipu_cnn:
+            if self.format != vai_q_onnx.QuantFormat.QDQ:
+                raise ValueError(f'ipu cnn configuration only support format "qdq". Got {self.format}.')
+
+            if self.calibration_method not in {
+                vai_q_onnx.PowerOfTwoMethod.NonOverflow,
+                vai_q_onnx.PowerOfTwoMethod.MinMSE,
+            }:
+                raise ValueError(
+                    f'ipu cnn configuration only support calibration_method "nonoverflow" and "mse". Got {self.calibration_method.name.lower()}.'
+                )
+
+            if not (self.extra_options.activation_symmetric and self.extra_options.weight_symmetric):
+                raise ValueError(
+                    "ipu cnn configuration requires setting activation_symmetric and weight_symmetric to true."
+                )
+
+            if self.weights_dtype != vai_q_onnx.QuantType.QInt8:
+                raise ValueError(
+                    f'ipu cnn configuration only support weights_dtype "int8". Got {self.weights_dtype.name.lower()}.'
+                )
+
+            if self.per_channel:
+                raise ValueError("ipu cnn configuration only supports per tensor. Got per_channel=True.")
+
+    def __setattr__(self, name, value):
+        if name == "extra_options" and isinstance(value, dict):
+            setattr(self, "extra_options", ExtraOptions(**value))
+        else:
+            super().__setattr__(name, value)
+
+    def __getattribute__(self, name: str):
+        value = super().__getattribute__(name)
+        if isinstance(value, str):
+            if name == "format":
+                value = QUANT_FORMAT_MAPPING[value]
+            elif name == "calibration_method":
+                value = CALIBRATION_METHOD_MAPPING[value]
+            elif name == "activations_dtype":
+                value = QUANT_TYPE_MAPPING[value]
+            elif name == "weights_dtype":
+                value = QUANT_TYPE_MAPPING[value]
+
+        return value
+
+    def to_dict(self):
+        options_dict = self.__dict__.copy()
+        options_dict["extra_options"] = options_dict["extra_options"].to_diff_dict()
+        return options_dict
+
+    def to_diff_dict(self) -> dict:
+        """
+        Returns a dictionary of non-default values in the configuration.
+        """
+        non_default_values = {}
+        for option in fields(self):
+            if option.name == "extra_options":
+                extra_options_dict = getattr(self, option.name).to_diff_dict()
+                if extra_options_dict:
+                    non_default_values[option.name] = extra_options_dict
+            else:
+                value = getattr(self, option.name)
+
+                if value != option.default and value not in ({}, []):
+                    if option.name == "execution_providers" and value == ["CPUExecutionProvider"]:
+                        continue
+
+                    if isinstance(value, Enum):
+                        value = value.name
+                    elif isinstance(value, list):
+                        value = [elem.name if isinstance(elem, Enum) else elem for elem in value]
+
+                    non_default_values[option.name] = value
+        return non_default_values
 
     @property
     def use_symmetric_calibration(self) -> bool:
-        return self.activations_symmetric and self.weights_symmetric
+        if self.extra_options:
+            return self.extra_options.activation_symmetric and self.extra_options.weight_symmetric
+
+        return ExtraOptions().activation_symmetric and ExtraOptions().weight_symmetric
 
     def __str__(self):
         return (
             f"{self.format} ("
-            f"schema: {QuantizationConfig.quantization_type_str(self.activations_dtype, self.weights_dtype)}, "
-            f"enable_dpu: {self.enable_dpu})"
+            f"schema: {self.activations_dtype.name}/{self.weights_dtype.name}, "
+            f"enable_ipu_cnn: {self.enable_ipu_cnn})"
         )
 
 
 class AutoQuantizationConfig:
     @staticmethod
-    def ipu_cnn_config():
+    def ipu_cnn_config(
+        calibrate_method: Literal["nonoverflow", "mse", "minmax", "entropy", "percentile"] = "mse",
+        nodes_to_quantize: List[str] = [],
+        nodes_to_exclude: List[str] = [],
+        op_types_to_quantize: List[str] = [],
+        extra_options: Optional[Union[Dict[str, bool], ExtraOptions]] = None,
+    ):
+        extra_options = extra_options or {}
+        if isinstance(extra_options, dict):
+            extra_options = ExtraOptions(**extra_options)
+
+        extra_options_dict = extra_options.to_diff_dict()
+        extra_options_dict["activation_symmetric"] = extra_options_dict.get("activation_symmetric", True)
+
         return QuantizationConfig(
-            format=QuantFormat.QDQ,
-            calibration_method=vai_q_onnx.PowerOfTwoMethod.MinMSE,
-            activations_dtype=QuantType.QUInt8,
-            activations_symmetric=True,
-            weights_dtype=QuantType.QInt8,
-            weights_symmetric=True,
-            enable_dpu=True,
+            format="qdq",
+            calibration_method=calibrate_method,
+            activations_dtype="uint8",
+            weights_dtype="int8",
+            enable_ipu_cnn=True,
+            op_types_to_quantize=op_types_to_quantize,
+            nodes_to_quantize=nodes_to_quantize,
+            nodes_to_exclude=nodes_to_exclude,
+            extra_options=ExtraOptions(**extra_options_dict),
+        )
+
+    @staticmethod
+    def ipu_transformer_config(
+        calibrate_method: Literal["nonoverflow", "mse", "minmax", "entropy", "percentile"] = "minmax",
+        nodes_to_quantize: List[str] = [],
+        nodes_to_exclude: List[str] = [],
+        op_types_to_quantize: List[str] = [],
+        extra_options: Optional[Union[Dict[str, bool], ExtraOptions]] = None,
+    ):
+        extra_options = extra_options or {}
+        if isinstance(extra_options, dict):
+            extra_options = ExtraOptions(**extra_options)
+
+        extra_options_dict = extra_options.to_diff_dict()
+        extra_options_dict["activation_symmetric"] = extra_options_dict.get("activation_symmetric", True)
+
+        return QuantizationConfig(
+            format="qdq",
+            calibration_method=calibrate_method,
+            activations_dtype="int8",
+            weights_dtype="int8",
+            op_types_to_quantize=op_types_to_quantize,
+            nodes_to_quantize=nodes_to_quantize,
+            nodes_to_exclude=nodes_to_exclude,
+            extra_options=ExtraOptions(**extra_options_dict),
         )
 
     @staticmethod
     def cpu_cnn_config(
-        use_symmetric_activations: bool = False,
-        use_symmetric_weights: bool = True,
-        enable_dpu: bool = False,
+        calibrate_method: Literal["nonoverflow", "mse", "minmax", "entropy", "percentile"] = "minmax",
+        nodes_to_quantize: List[str] = [],
+        nodes_to_exclude: List[str] = [],
+        op_types_to_quantize: List[str] = [],
+        extra_options: Optional[Union[Dict[str, bool], ExtraOptions]] = None,
     ):
+        extra_options = extra_options or {}
+        if isinstance(extra_options, dict):
+            extra_options = ExtraOptions(**extra_options)
+
         return QuantizationConfig(
-            format=QuantFormat.QDQ,
-            calibration_method=vai_q_onnx.CalibrationMethod.MinMax,
-            activations_dtype=QuantType.QUInt8,
-            activations_symmetric=use_symmetric_activations,
-            weights_dtype=QuantType.QInt8,
-            weights_symmetric=use_symmetric_weights,
-            enable_dpu=enable_dpu,
+            format="qdq",
+            calibration_method=calibrate_method,
+            activations_dtype="uint8",
+            weights_dtype="int8",
+            op_types_to_quantize=op_types_to_quantize,
+            nodes_to_quantize=nodes_to_quantize,
+            nodes_to_exclude=nodes_to_exclude,
+            extra_options=extra_options,
         )
 
 
@@ -122,7 +561,7 @@ def __init__(
     ):
         super().__init__()
         self.opset = opset
-        self.quantization = self.dataclass_to_dict(quantization)
+        self.quantization = quantization.to_dict() if quantization is not None else None
         self.optimum_version = kwargs.pop("optimum_version", None)
 
     @staticmethod
diff --git a/optimum/amd/ryzenai/quantization.py b/optimum/amd/ryzenai/quantization.py
index 28fc5621..92aadaf5 100644
--- a/optimum/amd/ryzenai/quantization.py
+++ b/optimum/amd/ryzenai/quantization.py
@@ -163,19 +163,35 @@ def quantize(
         quantized_model_path = save_dir.joinpath(f"{self.onnx_model_path.stem}{suffix}").with_suffix(".onnx")
 
         LOGGER.info("Quantizing model...")
+
         quantize_static(
             model_input=Path(self.onnx_model_path).as_posix(),
             model_output=quantized_model_path.as_posix(),
             calibration_data_reader=reader,
             quant_format=quantization_config.format,
             calibrate_method=quantization_config.calibration_method,
-            weight_type=quantization_config.weights_dtype,
+            input_nodes=quantization_config.input_nodes,
+            output_nodes=quantization_config.output_nodes,
+            op_types_to_quantize=quantization_config.op_types_to_quantize,
+            random_data_reader_input_shape=quantization_config.random_data_reader_input_shape,
+            per_channel=quantization_config.per_channel,
+            reduce_range=quantization_config.reduce_range,
             activation_type=quantization_config.activations_dtype,
-            enable_dpu=quantization_config.enable_dpu,
-            extra_options={
-                "WeightSymmetric": quantization_config.weights_symmetric,
-                "ActivationSymmetric": quantization_config.activations_symmetric,
-            },
+            weight_type=quantization_config.weights_dtype,
+            nodes_to_quantize=quantization_config.nodes_to_quantize,
+            nodes_to_exclude=quantization_config.nodes_to_exclude,
+            optimize_model=quantization_config.optimize_model,
+            use_external_data_format=quantization_config.use_external_data_format,
+            execution_providers=quantization_config.execution_providers,
+            enable_ipu_cnn=quantization_config.enable_ipu_cnn,
+            convert_fp16_to_fp32=quantization_config.convert_fp16_to_fp32,
+            convert_nchw_to_nhwc=quantization_config.convert_nchw_to_nhwc,
+            include_cle=quantization_config.include_cle,
+            extra_options=(
+                quantization_config.extra_options.to_diff_dict(camel_case=True)
+                if quantization_config.extra_options
+                else {}
+            ),
         )
 
         LOGGER.info(f"Saved quantized model at: {save_dir}")
diff --git a/tests/ryzenai/test_configuration.py b/tests/ryzenai/test_configuration.py
new file mode 100644
index 00000000..29382ceb
--- /dev/null
+++ b/tests/ryzenai/test_configuration.py
@@ -0,0 +1,169 @@
+import unittest
+
+import vai_q_onnx
+from parameterized import parameterized
+
+from optimum.amd.ryzenai import AutoQuantizationConfig, ExtraOptions, QuantizationConfig
+
+
+class TestExtraOptions(unittest.TestCase):
+    def test_default_values(self):
+        options = ExtraOptions()
+        self.assertEqual(options.activation_symmetric, False)
+        self.assertEqual(options.weight_symmetric, True)
+        self.assertEqual(options.use_unsigned_relu, False)
+        self.assertEqual(options.quantize_bias, True)
+        self.assertEqual(options.remove_input_init, True)
+        self.assertEqual(options.enable_subgraph, False)
+        self.assertEqual(options.force_quantize_no_input_check, False)
+        self.assertEqual(options.matmul_const_b_only, False)
+        self.assertEqual(options.add_qdq_pair_to_weight, False)
+        self.assertEqual(options.op_types_to_exclude_output_quantization, [])
+        self.assertEqual(options.dedicated_qdq_pair, False)
+        self.assertEqual(options.qdq_op_type_per_channel_support_to_axis, {})
+        self.assertEqual(options.use_qdq_vitis_custom_ops, True)
+        self.assertEqual(options.calib_tensor_range_symmetric, False)
+        self.assertEqual(options.calib_moving_average, False)
+        self.assertEqual(options.calib_moving_average_constant, 0.01)
+        self.assertEqual(options.random_data_reader_input_data_range, None)
+        self.assertEqual(options.int16_scale, False)
+        self.assertEqual(options.min_mse_mode, "All")
+        self.assertEqual(options.convert_bn_to_conv, True)
+        self.assertEqual(options.convert_reduce_mean_to_global_avg_pool, True)
+        self.assertEqual(options.split_large_kernel_pool, True)
+        self.assertEqual(options.convert_split_to_slice, True)
+        self.assertEqual(options.fuse_instance_norm, False)
+        self.assertEqual(options.fuse_l2_norm, False)
+        self.assertEqual(options.convert_clip_to_relu, False)
+        self.assertEqual(options.simulate_dpu, True)
+        self.assertEqual(options.convert_leaky_relu_to_dpu_version, True)
+        self.assertEqual(options.convert_sigmoid_to_hard_sigmoid, True)
+        self.assertEqual(options.convert_hard_sigmoid_to_dpu_version, True)
+        self.assertEqual(options.convert_avg_pool_to_dpu_version, True)
+        self.assertEqual(options.convert_reduce_mean_to_dpu_version, True)
+        self.assertEqual(options.convert_softmax_to_dpu_version, False)
+        self.assertEqual(options.ipu_limitation_check, True)
+        self.assertEqual(options.adjust_shift_cut, True)
+        self.assertEqual(options.adjust_shift_bias, True)
+        self.assertEqual(options.adjust_shift_read, True)
+        self.assertEqual(options.adjust_shift_write, True)
+        self.assertEqual(options.adjust_hard_sigmoid, True)
+        self.assertEqual(options.adjust_shift_swish, True)
+        self.assertEqual(options.align_concat, True)
+        self.assertEqual(options.align_pool, True)
+        self.assertEqual(options.replace_clip6_relu, False)
+        self.assertEqual(options.cle_steps, 1)
+        self.assertEqual(options.cle_total_layer_diff_threshold, 2e-7)
+        self.assertEqual(options.cle_scale_append_bias, True)
+        self.assertEqual(options.remove_qdq_conv_leaky_relu, False)
+        self.assertEqual(options.remove_qdq_conv_prelu, False)
+
+    def test_snake_to_camel(self):
+        options = ExtraOptions()
+        camel_case_dict = options.snake_to_camel
+        self.assertEqual(
+            camel_case_dict["qdq_op_type_per_channel_support_to_axis"], "QDQOpTypePerChannelSupportToAxis"
+        )
+        self.assertEqual(camel_case_dict["ipu_limitation_check"], "IPULimitationCheck")
+        self.assertEqual(camel_case_dict["cle_steps"], "CLESteps")
+        self.assertEqual(camel_case_dict["cle_total_layer_diff_threshold"], "CLETotalLayerDiffThreshold")
+        self.assertEqual(camel_case_dict["cle_scale_append_bias"], "CLEScaleAppendBias")
+
+    def test_camel_to_snake_setattr(self):
+        options = ExtraOptions()
+        options.QDQOpTypePerChannelSupportToAxis = "some_value"
+        options.IPULimitationCheck = False
+        options.CLESteps = 5
+        options.CLETotalLayerDiffThreshold = 1e-7
+        options.CLEScaleAppendBias = False
+        self.assertEqual(options.qdq_op_type_per_channel_support_to_axis, "some_value")
+        self.assertFalse(options.ipu_limitation_check)
+        self.assertEqual(options.cle_steps, 5)
+        self.assertEqual(options.cle_total_layer_diff_threshold, 1e-7)
+        self.assertFalse(options.cle_scale_append_bias)
+
+    def test_to_diff_dict(self):
+        options = ExtraOptions(activation_symmetric=True)
+        diff_dict = options.to_diff_dict()
+        self.assertEqual(diff_dict, {"activation_symmetric": True})
+
+    @parameterized.expand(
+        [
+            ("activation_symmetric", False, True),
+            ("weight_symmetric", True, False),
+            ("use_unsigned_relu", False, True),
+        ]
+    )
+    def test_parametric_setting_attributes(self, attribute, default_value, new_value):
+        options = ExtraOptions()
+        self.assertEqual(getattr(options, attribute), default_value)
+        setattr(options, attribute, new_value)
+        self.assertEqual(getattr(options, attribute), new_value)
+
+
+class TestQuantizationConfig(unittest.TestCase):
+    def test_default_values(self):
+        config = QuantizationConfig()
+        self.assertEqual(config.format, vai_q_onnx.QuantFormat.QDQ)
+        self.assertEqual(config.calibration_method, vai_q_onnx.PowerOfTwoMethod.MinMSE)
+        self.assertEqual(config.input_nodes, [])
+        self.assertEqual(config.output_nodes, [])
+        self.assertEqual(config.op_types_to_quantize, [])
+        self.assertEqual(config.random_data_reader_input_shape, [])
+        self.assertFalse(config.per_channel)
+        self.assertFalse(config.reduce_range)
+        self.assertEqual(config.activations_dtype, vai_q_onnx.QuantType.QUInt8)
+        self.assertEqual(config.weights_dtype, vai_q_onnx.QuantType.QInt8)
+        self.assertEqual(config.nodes_to_quantize, [])
+        self.assertEqual(config.nodes_to_exclude, [])
+        self.assertTrue(config.optimize_model)
+        self.assertFalse(config.use_external_data_format)
+        self.assertEqual(config.execution_providers, ["CPUExecutionProvider"])
+        self.assertFalse(config.enable_ipu_cnn)
+        self.assertFalse(config.convert_fp16_to_fp32)
+        self.assertFalse(config.convert_nchw_to_nhwc)
+        self.assertFalse(config.include_cle)
+        self.assertIsInstance(config.extra_options, ExtraOptions)
+
+    def test_extra_options_initialization(self):
+        extra_options = ExtraOptions(activation_symmetric=True)
+        config = QuantizationConfig(extra_options=extra_options)
+        self.assertEqual(config.extra_options.activation_symmetric, True)
+
+    def test_use_symmetric_calibration(self):
+        config = QuantizationConfig(extra_options=ExtraOptions(activation_symmetric=True, weight_symmetric=True))
+        self.assertTrue(config.use_symmetric_calibration)
+
+    @parameterized.expand(
+        [
+            ("format", vai_q_onnx.QuantFormat.QDQ, vai_q_onnx.QuantFormat.QOperator),
+            ("calibration_method", vai_q_onnx.PowerOfTwoMethod.MinMSE, vai_q_onnx.CalibrationMethod.Entropy),
+            ("activations_dtype", vai_q_onnx.QuantType.QUInt8, vai_q_onnx.QuantType.QInt8),
+            ("weights_dtype", vai_q_onnx.QuantType.QInt8, vai_q_onnx.QuantType.QUInt8),
+        ]
+    )
+    def test_parametric_setting_attributes(self, attribute, default_value, new_value):
+        config = QuantizationConfig()
+        self.assertEqual(getattr(config, attribute), default_value)
+        setattr(config, attribute, new_value)
+        self.assertEqual(getattr(config, attribute), new_value)
+
+
+class TestAutoQuantizationConfig(unittest.TestCase):
+    def test_ipu_cnn_config(self):
+        config = AutoQuantizationConfig.ipu_cnn_config()
+        self.assertEqual(config.format, vai_q_onnx.QuantFormat.QDQ)
+        self.assertEqual(config.calibration_method, vai_q_onnx.PowerOfTwoMethod.MinMSE)
+        self.assertTrue(config.extra_options.activation_symmetric)
+
+    def test_ipu_transformer_config(self):
+        config = AutoQuantizationConfig.ipu_transformer_config()
+        self.assertEqual(config.format, vai_q_onnx.QuantFormat.QDQ)
+        self.assertEqual(config.calibration_method, vai_q_onnx.CalibrationMethod.MinMax)
+        self.assertTrue(config.extra_options.activation_symmetric)
+
+    def test_cpu_cnn_config(self):
+        config = AutoQuantizationConfig.cpu_cnn_config()
+
+        self.assertEqual(config.format, vai_q_onnx.QuantFormat.QDQ)
+        self.assertEqual(config.calibration_method, vai_q_onnx.CalibrationMethod.MinMax)