microsoft · xieofxie · Mar 4, 2025 · Mar 6, 2025
diff --git a/model_lab_configs/huggingface/microsoft/resnet-50/0.1/imagenet.py b/model_lab_configs/huggingface/microsoft/resnet-50/0.1/imagenet.py
@@ -0,0 +1,74 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+# --------------------------------------------------------------------------
+from logging import getLogger
+from pathlib import Path
+
+import numpy as np
+import torchvision.transforms as transforms
+from torch import from_numpy
+from torch.utils.data import Dataset
+
+from olive.data.registry import Registry
+
+logger = getLogger(__name__)
+
+
+class ImagenetDataset(Dataset):
+    def __init__(self, data):
+        self.images = from_numpy(data["images"])
+        self.labels = from_numpy(data["labels"])
+
+    def __len__(self):
+        return min(len(self.images), len(self.labels))
+
+    def __getitem__(self, idx):
+        return {"input": self.images[idx]}, self.labels[idx]
+
+
+@Registry.register_post_process()
+def imagenet_post_fun(output):
+    return output.argmax(axis=1)
+
+
+preprocess = transforms.Compose(
+    [
+        transforms.Resize(256),
+        transforms.CenterCrop(224),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    ]
+)
+
+
+@Registry.register_pre_process()
+def dataset_pre_process(output_data, **kwargs):
+    cache_key = kwargs.get("cache_key")
+    size = kwargs.get("size", 256)
+    cache_file = None
+    if cache_key:
+        cache_file = Path(f"./cache/data/{cache_key}_{size}.npz")
+        if cache_file.exists():
+            with np.load(Path(cache_file)) as data:
+                return ImagenetDataset(data)
+
+    labels = []
+    images = []
+    for i, sample in enumerate(output_data):
+        if i >= size:
+            break
+        image = sample["image"]
+        label = sample["label"]
+        image = image.convert("RGB")
+        image = preprocess(image)
+        images.append(image)
+        labels.append(label)
+
+    result_data = ImagenetDataset({"images": np.array(images), "labels": np.array(labels)})
+
+    if cache_file:
+        cache_file.parent.resolve().mkdir(parents=True, exist_ok=True)
+        np.savez(cache_file, images=np.array(images), labels=np.array(labels))
+
+    return result_data
diff --git a/model_lab_configs/huggingface/microsoft/resnet-50/0.1/modelspace.config b/model_lab_configs/huggingface/microsoft/resnet-50/0.1/modelspace.config
@@ -0,0 +1,8 @@
+{
+    "models":[
+        {
+            "name": "Convert to QNN",
+            "file": "resnet_ptq_qnn.json"
+        }
+    ]
+}
diff --git a/model_lab_configs/huggingface/microsoft/resnet-50/0.1/requirements.txt b/model_lab_configs/huggingface/microsoft/resnet-50/0.1/requirements.txt
@@ -0,0 +1 @@
+olive-ai
diff --git a/model_lab_configs/huggingface/microsoft/resnet-50/0.1/resnet_ptq_qnn.json b/model_lab_configs/huggingface/microsoft/resnet-50/0.1/resnet_ptq_qnn.json
@@ -0,0 +1,131 @@
+{
+    "input_model": {
+        "type": "HfModel",
+        "model_path": "microsoft/resnet-50",
+        "task": "image-classification",
+        "io_config": {
+            "input_names": [ "input" ],
+            "input_shapes": [ [ 1, 3, 224, 224 ] ],
+            "output_names": [ "output" ]
+        }
+    },
+    "systems": {
+        "qnn_system": {
+            "type": "LocalSystem",
+            "accelerators": [ { "device": "npu", "execution_providers": [ "QNNExecutionProvider" ] } ]
+        }
+    },
+    "data_configs": [
+        {
+            "name": "quantize_data_config",
+            "type": "HuggingfaceContainer",
+            "user_script": "imagenet.py",
+            "load_dataset_config": {
+                "data_name": "imagenet-1k",
+                "split": "validation",
+                "streaming": true,
+                "trust_remote_code": true
+            },
+            "pre_process_data_config": { "type": "dataset_pre_process", "size": 256, "cache_key": "imagenet" },
+            "post_process_data_config": { "type": "imagenet_post_fun" }
+        }
+    ],
+    "evaluators": {
+        "common_evaluator": {
+            "metrics": [
+                {
+                    "name": "accuracy_qnn",
+                    "type": "accuracy",
+                    "data_config": "quantize_data_config",
+                    "sub_types": [
+                        {
+                            "name": "accuracy_score",
+                            "priority": 1,
+                            "metric_config": { "task": "multiclass", "num_classes": 1001 }
+                        }
+                    ],
+                    "user_config": {
+                        "inference_settings": {
+                            "onnx": {
+                                "session_options": {
+                                    "extra_session_config": { "session.disable_cpu_ep_fallback": "1" }
+                                },
+                                "execution_provider": "QNNExecutionProvider",
+                                "provider_options": [ { "backend_path": "QnnHtp.dll" } ]
+                            }
+                        }
+                    }
+                },
+                {
+                    "name": "accuracy_cpu",
+                    "type": "accuracy",
+                    "data_config": "quantize_data_config",
+                    "sub_types": [
+                        {
+                            "name": "accuracy_score",
+                            "priority": 2,
+                            "metric_config": { "task": "multiclass", "num_classes": 1001 }
+                        }
+                    ],
+                    "user_config": {
+                        "inference_settings": { "onnx": { "execution_provider": "CPUExecutionProvider" } }
+                    }
+                },
+                {
+                    "name": "latency_qnn",
+                    "type": "latency",
+                    "data_config": "quantize_data_config",
+                    "sub_types": [ { "name": "avg", "priority": 3 } ],
+                    "user_config": {
+                        "inference_settings": {
+                            "onnx": {
+                                "session_options": {
+                                    "extra_session_config": { "session.disable_cpu_ep_fallback": "1" }
+                                },
+                                "execution_provider": "QNNExecutionProvider",
+                                "provider_options": [ { "backend_path": "QnnHtp.dll" } ]
+                            }
+                        }
+                    }
+                },
+                {
+                    "name": "latency_cpu",
+                    "type": "latency",
+                    "data_config": "quantize_data_config",
+                    "sub_types": [ { "name": "avg", "priority": 4 } ],
+                    "user_config": {
+                        "inference_settings": { "onnx": { "execution_provider": "CPUExecutionProvider" } }
+                    }
+                }
+            ]
+        }
+    },
+    "passes": {
+        "conversion": {
+            "device": "cpu",
+            "type": "OnnxConversion",
+            "target_opset": 17,
+            "save_as_external_data": true,
+            "all_tensors_to_one_file": true,
+            "dynamic": false,
+            "use_dynamo_exporter": false
+        },
+        "QNNPreprocess": { "type": "QNNPreprocess" },
+        "OnnxQuantization": {
+            "type": "OnnxQuantization",
+            "data_config": "quantize_data_config",
+            "activation_type": "QUInt16",
+            "weight_type": "QUInt8",
+            "calibrate_method": "MinMax",
+            "quant_preprocess": true,
+            "prepare_qnn_config": true
+        }
+    },
+    "host": "qnn_system",
+    "target": "qnn_system",
+    "evaluator": "common_evaluator",
+    "cache_dir": "cache",
+    "clean_cache": true,
+    "output_dir": "models/resnet_ptq_qnn",
+    "evaluate_input_model": false
+}
diff --git a/model_lab_configs/huggingface/microsoft/resnet-50/0.1/resnet_ptq_qnn.json.config b/model_lab_configs/huggingface/microsoft/resnet-50/0.1/resnet_ptq_qnn.json.config
@@ -0,0 +1,12 @@
+{
+    "parameters": [
+        {
+            "path": "passes.OnnxQuantization.weight_type",
+            "template": "WeightType"
+        },
+        {
+            "path": "passes.OnnxQuantization.activation_type",
+            "template": "ActivationType"
+        }
+    ]
+}
diff --git a/model_lab_configs/huggingface/microsoft/resnet-50/0.1/resnet_ptq_qnn.json.md b/model_lab_configs/huggingface/microsoft/resnet-50/0.1/resnet_ptq_qnn.json.md
@@ -0,0 +1,11 @@
+# ResNet optimization with QNN execution providers
+
+This example performs ResNet optimization with QNN execution providers in one workflow. It performs the optimization pipeline:
+
+- *PyTorch Model -> Onnx Model -> QNN Quantized Onnx Model*
+
+## Evaluation result
+
+| Activation Type | Weight Type | Accuracy | Latency (avg) |
+|-----------------|-------------|----------|---------|
+| QUInt16         | QUInt8      |  0.78515625      | 2.53724 ms  |
diff --git a/model_lab_configs/model_list.json b/model_lab_configs/model_list.json
@@ -0,0 +1,12 @@
+{
+    "models": [
+        {
+            "name": "Microsoft/ResNet-50",
+            "icon": "ms",
+            "model_link": "https://huggingface.co/microsoft/resnet-50",
+            "id": "huggingface/microsoft/resnet-50",
+            "runtime": ["QNN"],
+            "architecture": "CNN"
+        }
+    ]
+}
diff --git a/model_lab_configs/parameter_template.json b/model_lab_configs/parameter_template.json
@@ -0,0 +1,14 @@
+{
+    "ActivationType": {
+        "name": "Activation Type",
+        "description": "Quantization data type of activation",
+        "type": "enum",
+        "values": ["QUInt16", "QInt16"]
+    },
+    "WeightType": {
+        "name": "Weight Type",
+        "description": "Data type for quantizing weights",
+        "type": "enum",
+        "values": ["QInt8", "QUInt8"]
+    }
+}