diff --git a/mobile_back_apple/.gitignore b/mobile_back_apple/.gitignore index c7058d2ef..0fbca8bfc 100644 --- a/mobile_back_apple/.gitignore +++ b/mobile_back_apple/.gitignore @@ -1,4 +1,5 @@ dev-resources +models/*.mlpackage # Created by https://www.toptal.com/developers/gitignore/api/xcode # Edit at https://www.toptal.com/developers/gitignore?templates=xcode diff --git a/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt b/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt index 758d675cc..1e2bea519 100644 --- a/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt +++ b/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt @@ -65,22 +65,22 @@ benchmark_setting { delegate_name: "CPU & GPU & ANE" accelerator_name: "cpu&gpu&ane" accelerator_desc: "All compute units" - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-coreml/MobilenetV4_Large.mlmodel" - model_checksum: "fbfba70596f74010852f22fa04721202" + model_path: "https://github.com/anhappdev/tmp/releases/download/1/mobilenetv4_fp32_NCHW.mlpackage.zip" + model_checksum: "164c504eb3e9af6c730c1765b8b81b32" } delegate_choice: { delegate_name: "CPU & GPU" accelerator_name: "cpu&gpu" accelerator_desc: "CPU and GPU" - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-coreml/MobilenetV4_Large.mlmodel" - model_checksum: "fbfba70596f74010852f22fa04721202" + model_path: "https://github.com/anhappdev/tmp/releases/download/1/mobilenetv4_fp32_NCHW.mlpackage.zip" + model_checksum: "164c504eb3e9af6c730c1765b8b81b32" } delegate_choice: { delegate_name: "CPU & ANE" accelerator_name: "cpu&ane" accelerator_desc: "CPU and Neural Engine" - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-coreml/MobilenetV4_Large.mlmodel" - model_checksum: "fbfba70596f74010852f22fa04721202" + model_path: "https://github.com/anhappdev/tmp/releases/download/1/mobilenetv4_fp32_NCHW.mlpackage.zip" + model_checksum: "164c504eb3e9af6c730c1765b8b81b32" } delegate_selected: "CPU & GPU & ANE" } @@ -93,24 +93,24 @@ benchmark_setting { accelerator_name: "cpu&gpu&ane" accelerator_desc: "All compute units" batch_size: 32 - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-coreml/MobilenetV4_Large.mlmodel" - model_checksum: "fbfba70596f74010852f22fa04721202" + model_path: "https://github.com/anhappdev/tmp/releases/download/1/mobilenetv4_fp32_NCHW.mlpackage.zip" + model_checksum: "164c504eb3e9af6c730c1765b8b81b32" } delegate_choice: { delegate_name: "CPU & GPU" accelerator_name: "cpu&gpu" accelerator_desc: "CPU and GPU" batch_size: 32 - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-coreml/MobilenetV4_Large.mlmodel" - model_checksum: "fbfba70596f74010852f22fa04721202" + model_path: "https://github.com/anhappdev/tmp/releases/download/1/mobilenetv4_fp32_NCHW.mlpackage.zip" + model_checksum: "164c504eb3e9af6c730c1765b8b81b32" } delegate_choice: { delegate_name: "CPU & ANE" accelerator_name: "cpu&ane" accelerator_desc: "CPU and Neural Engine" batch_size: 32 - model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-coreml/MobilenetV4_Large.mlmodel" - model_checksum: "fbfba70596f74010852f22fa04721202" + model_path: "https://github.com/anhappdev/tmp/releases/download/1/mobilenetv4_fp32_NCHW.mlpackage.zip" + model_checksum: "164c504eb3e9af6c730c1765b8b81b32" } delegate_selected: "CPU & GPU & ANE" } diff --git a/mobile_back_apple/cpp/backend_coreml/main.cc b/mobile_back_apple/cpp/backend_coreml/main.cc index a297a46af..a2246345f 100644 --- a/mobile_back_apple/cpp/backend_coreml/main.cc +++ b/mobile_back_apple/cpp/backend_coreml/main.cc @@ -28,6 +28,7 @@ struct CoreMLBackendData { const char *vendor = "Apple"; const char *accelerator{nullptr}; CoreMLExecutor *coreMLExecutor{nullptr}; + bool expectNCHW = false; }; inline mlperf_data_t::Type MLMultiArrayDataType2MLPerfDataType( @@ -45,6 +46,24 @@ inline mlperf_data_t::Type MLMultiArrayDataType2MLPerfDataType( static bool backendExists = false; +template +void convert_nhwc_to_nchw(T *data_nhwc, int N, int H, int W, int C) { + T *data_nchw = new T[N * C * H * W]; + for (int n = 0; n < N; ++n) { + for (int c = 0; c < C; ++c) { + for (int h = 0; h < H; ++h) { + for (int w = 0; w < W; ++w) { + int index_nchw = ((n * C + c) * H + h) * W + w; + int index_nhwc = ((n * H + h) * W + w) * C + c; + data_nchw[index_nchw] = data_nhwc[index_nhwc]; + } + } + } + } + std::memcpy(data_nhwc, data_nchw, N * H * W * C * sizeof(T)); + delete[] data_nchw; +} + // Return the name of the backend const char *mlperf_backend_vendor_name(mlperf_backend_ptr_t backend_ptr) { return ((CoreMLBackendData *)backend_ptr)->vendor; @@ -82,6 +101,11 @@ mlperf_backend_ptr_t mlperf_backend_create( CoreMLBackendData *backend_data = new CoreMLBackendData(); backendExists = true; + // quick hack for checking if model expects NCHW input. + if (strcasestr(model_path, "NCHW") != nullptr) { + backend_data->expectNCHW = true; + LOG(INFO) << "Will convert inputs from NHWC to NCHW!"; + } // Load the model. NSError *error; @@ -184,3 +208,12 @@ mlperf_status_t mlperf_backend_get_output(mlperf_backend_ptr_t backend_ptr, return MLPERF_SUCCESS; return MLPERF_FAILURE; } + +void mlperf_backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes, + int width, int height, uint8_t *data) { + CoreMLBackendData *backend_data = (CoreMLBackendData *)backend_ptr; + if (backend_data->expectNCHW) { + int N = 1, H = height, W = width, C = 3; + convert_nhwc_to_nchw(reinterpret_cast(data), N, H, W, C); + } +} diff --git a/mobile_back_apple/dev-utils/Makefile b/mobile_back_apple/dev-utils/Makefile index bbe7cbbcb..6bdcb31c3 100644 --- a/mobile_back_apple/dev-utils/Makefile +++ b/mobile_back_apple/dev-utils/Makefile @@ -158,10 +158,10 @@ coreml-run-ic-offline: coreml-run-ic-v2: cd ${REPO_ROOT_DIR} && \ - bazel-bin/flutter/cpp/binary/main EXTERNAL image_classification \ + bazel-bin/flutter/cpp/binary/main EXTERNAL image_classification_v2 \ --mode=PerformanceOnly \ --output_dir="${REPO_ROOT_DIR}/output" \ - --model_file="${REPO_ROOT_DIR}/mobile_back_apple/dev-resources/mobilenet_v4/MobilenetV4_Large.mlmodel" \ + --model_file="${REPO_ROOT_DIR}/mobile_back_apple/dev-resources/mobilenet_v4/mobilenetv4_fp32_NCHW.mlpackage" \ --image_width=384 \ --image_height=384 \ --lib_path="bazel-bin/mobile_back_apple/cpp/backend_coreml/libcoremlbackend.so" \ @@ -171,7 +171,7 @@ coreml-run-ic-v2: coreml-run-ic-offline-v2: cd ${REPO_ROOT_DIR} && \ - bazel-bin/flutter/cpp/binary/main EXTERNAL image_classification_offline \ + bazel-bin/flutter/cpp/binary/main EXTERNAL image_classification_offline_v2 \ --mode=PerformanceOnly \ --scenario=Offline \ --batch_size=32 \ diff --git a/mobile_back_apple/models/mobilenetv4_pytorch.py b/mobile_back_apple/models/mobilenetv4_pytorch.py new file mode 100644 index 000000000..6e591562c --- /dev/null +++ b/mobile_back_apple/models/mobilenetv4_pytorch.py @@ -0,0 +1,194 @@ +# !/usr/bin/env python3 +# coding: utf-8 + +# Copyright 2024 The MLPerf Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Tested with torch==2.3.1, coremltools==8.0b1, timm==1.0.7, macOS 14.5, XCode 16.0 beta + +import os +import timm +import torch +import numpy as np +import coremltools as ct +import coremltools.optimize.coreml as cto +from torchvision.transforms import v2 + +# The following API is for coremltools==8.0b1 +# It will be moved out of "experimental" in later versions of coremltools +from coremltools.optimize.coreml.experimental import OpActivationLinearQuantizerConfig, \ + linear_quantize_activations +from PIL import Image + +C = 3 +H = 384 +W = 384 + +INPUT_NAME = 'images' +OUTPUT_NAME = 'softmax' +MODEL_NAME = 'hf-hub:timm/mobilenetv4_conv_large.e600_r384_in1k' + +MLMODEL_FILE_FP32 = 'mobilenetv4_fp32.mlpackage' +MLMODEL_FILE_W8 = "mobilenetv4_w8.mlpackage" +MLMODEL_FILE_W8A8 = "mobilenetv4_w8a8.mlpackage" + +IMAGE_DIR = './imagenet' +LABELS_FILE = 'imagenet_val_full.txt' + + +def load_labels(labels_file: str) -> list[str]: + with open(labels_file, 'r') as f: + lines = f.readlines() + return lines + + +def load_dummy_images(count: int = 9) -> list[Image]: + images = [] + for _ in range(count): + dummy_image = np.random.randint(0, 256, (H, W, C), dtype=np.uint8) + images.append(Image.fromarray(dummy_image)) + return images + + +def load_images_from_folder(folder: str, max_images: int = None) -> list[Image]: + images = [] + filenames = os.listdir(folder) + filenames.sort() + if max_images is not None and len(filenames) > max_images: + filenames = filenames[:max_images] + for filename in filenames: + if filename.lower().endswith((".jpg", ".jpeg", ".png")): + img_path = os.path.join(folder, filename) + img = Image.open(img_path).convert('RGB') + images.append(img) + print(f'Loaded: {filename}') + print(f'Loaded {len(images)} images from {folder}') + return images + + +def preprocess_images(pil_images: list[Image]) -> list[dict]: + # mean and std for ImageNet + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + transform = v2.Compose([ + v2.ToImage(), + v2.ToDtype(torch.uint8, scale=True), + v2.CenterCrop(size=(H, W)), + v2.ToDtype(torch.float32, scale=True), + v2.Normalize(mean, std) + ]) + transformed_images = transform(pil_images) + data = [] + for image in transformed_images: + img_np = image.numpy() + img_np = img_np.reshape(1, C, H, W) + assert (img_np.shape == (1, C, H, W)) + data.append({INPUT_NAME: img_np}) + return data + + +def quantize_weights(mlmodel: ct.models.MLModel) -> ct.models.MLModel: + # quantize weights to 8 bits + weight_quant_op_config = cto.OpLinearQuantizerConfig(mode="linear_symmetric", + dtype="int8") + weight_quant_model_config = cto.OptimizationConfig(weight_quant_op_config) + mlmodel_quantized = cto.linear_quantize_weights(mlmodel, + weight_quant_model_config) + print('Weights quantization finished.') + return mlmodel_quantized + + +def quantize_activations(mlmodel: ct.models.MLModel, sample_data: list[dict]) -> ct.models.MLModel: + # quantize activations to 8 bits + act_quant_op_config = OpActivationLinearQuantizerConfig(mode="linear_symmetric", + dtype="int8") + act_quant_model_config = cto.OptimizationConfig(global_config=act_quant_op_config) + mlmodel_quantized = linear_quantize_activations(mlmodel, + act_quant_model_config, + sample_data=sample_data) + print('Activations quantization finished.') + return mlmodel_quantized + + +def convert_model(): + # Load the pretrained model + torch_model = timm.create_model(MODEL_NAME, pretrained=True) + torch_model.eval() + + # Inspect the model + print("num_classes", torch_model.num_classes) + print("data_config", timm.data.resolve_model_data_config(torch_model)) + + # Trace the model with random data + example_input = torch.rand(1, C, H, W) + traced_model = torch.jit.trace(torch_model, example_input) + _ = traced_model(example_input) + + # Convert the traced model to CoreML + ml_model = ct.convert( + traced_model, + convert_to="mlprogram", + inputs=[ct.TensorType(name=INPUT_NAME, shape=example_input.shape)], + outputs=[ct.TensorType(name=OUTPUT_NAME)], + # minimum_deployment_target=ct.target.iOS18 + ) + + ml_model.short_description = MODEL_NAME + + ml_model.save(MLMODEL_FILE_FP32) + print('Model converted from PyTorch to Core ML.') + + mlmodel_quantized = quantize_weights(ml_model) + mlmodel_quantized.save(MLMODEL_FILE_W8) + + # pil_images = load_dummy_images(count=9) + pil_images = load_images_from_folder(IMAGE_DIR, max_images=999) + sample_data = preprocess_images(pil_images) + mlmodel_quantized = quantize_activations(mlmodel_quantized, sample_data) + mlmodel_quantized.save(MLMODEL_FILE_W8A8) + + +def test_accuracy(mlmodel_file: str): + expected_labels = load_labels(LABELS_FILE) + pil_images = load_images_from_folder(IMAGE_DIR) + mlmodel = ct.models.MLModel(mlmodel_file) + batch_size = 999 + correct_predictions = 0 + total_predictions = 0 + total_images = len(pil_images) + for i in range(0, len(pil_images), batch_size): + batch_images = pil_images[i:i + batch_size] + image_data = preprocess_images(batch_images) + predictions = mlmodel.predict(image_data) + assert (len(predictions) == len(image_data)) + for j in range(len(image_data)): + total_predictions += 1 + predicted_label = np.argmax(predictions[j][OUTPUT_NAME]) + expected_label = int(expected_labels[i + j]) + if predicted_label == expected_label: + correct_predictions += 1 + moving_accuracy = correct_predictions / total_predictions + print(f'Moving Accuracy: {moving_accuracy * 100:.2f}%. Images processed: {total_predictions}/{total_images}.') + assert (total_predictions == len(pil_images)) + accuracy = correct_predictions / total_predictions + print(f'Accuracy: {accuracy * 100:.2f}%. Images processed: {total_predictions}/{total_images}.') + + +def main(): + convert_model() + test_accuracy(mlmodel_file=MLMODEL_FILE_W8A8) + + +if __name__ == "__main__": + main()