diff --git a/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/quantizing-with-accuracy-control.rst b/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/quantizing-with-accuracy-control.rst index eddde03eb6bb4d..ce792da5684e2a 100644 --- a/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/quantizing-with-accuracy-control.rst +++ b/docs/articles_en/openvino-workflow/model-optimization-guide/quantizing-models-post-training/quantizing-with-accuracy-control.rst @@ -14,7 +14,7 @@ This is the advanced quantization flow that allows to apply 8-bit quantization t * Since accuracy validation is run several times during the quantization process, quantization with accuracy control can take more time than the :doc:`Basic 8-bit quantization ` flow. * The resulted model can provide smaller performance improvement than the :doc:`Basic 8-bit quantization ` flow because some of the operations are kept in the original precision. -.. note:: Currently, 8-bit quantization with accuracy control is available only for models in OpenVINO representation. +.. note:: Currently, 8-bit quantization with accuracy control is available only for models in OpenVINO and onnx.ModelProto representation. The steps for the quantization with accuracy control are described below. @@ -38,10 +38,18 @@ This step is similar to the :doc:`Basic 8-bit quantization `__ * `Post-Training Quantization of YOLOv8 OpenVINO Model with control of accuracy metric `__ +* `Post-Training Quantization of YOLOv8 ONNX Model with control of accuracy metric `__ See also #################### diff --git a/docs/optimization_guide/nncf/ptq/code/ptq_aa_onnx.py b/docs/optimization_guide/nncf/ptq/code/ptq_aa_onnx.py new file mode 100644 index 00000000000000..dd32cbdbf1d6bd --- /dev/null +++ b/docs/optimization_guide/nncf/ptq/code/ptq_aa_onnx.py @@ -0,0 +1,75 @@ +# Copyright (C) 2018-2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +#! [dataset] +import nncf +import torch + +calibration_loader = torch.utils.data.DataLoader(...) + +def transform_fn(data_item): + images, _ = data_item + return {input_name: images.numpy()} # input_name should be taken from the model, + # e.g. model.graph.input[0].name + +calibration_dataset = nncf.Dataset(calibration_loader, transform_fn) +validation_dataset = nncf.Dataset(calibration_loader, transform_fn) +#! [dataset] + +#! [validation] +import numpy as np +import torch +from sklearn.metrics import accuracy_score + +import onnx +import onnxruntime + + +def validate(model: onnx.ModelProto, + validation_loader: torch.utils.data.DataLoader) -> float: + predictions = [] + references = [] + + input_name = model.graph.input[0].name + serialized_model = model.SerializeToString() + session = onnxruntime.InferenceSession(serialized_model, providers=["CPUExecutionProvider"]) + output_names = [output.name for output in session.get_outputs()] + + for images, target in validation_loader: + pred = session.run(output_names, input_feed={input_name: images.numpy()})[0] + predictions.append(np.argmax(pred, axis=1)) + references.append(target) + + predictions = np.concatenate(predictions, axis=0) + references = np.concatenate(references, axis=0) + return accuracy_score(predictions, references) +#! [validation] + +#! [quantization] +import onnx + +model = onnx.load("model_path") + +quantized_model = nncf.quantize_with_accuracy_control( + model, + calibration_dataset=calibration_dataset, + validation_dataset=validation_dataset, + validation_fn=validate, + max_drop=0.01, + drop_type=nncf.DropType.ABSOLUTE, +) +#! [quantization] + +#! [inference] +import openvino as ov + +# convert ONNX model to OpenVINO model +ov_quantized_model = ov.convert_model(quantized_model) + +# compile the model to transform quantized operations to int8 +model_int8 = ov.compile_model(ov_quantized_model) + +input_fp32 = ... # FP32 model input +res = model_int8(input_fp32) + +#! [inference]