From b816f6a7b2c1d74c0d361e2e5fdd4691a102b401 Mon Sep 17 00:00:00 2001
From: Anh <anh.app.dev@gmail.com>
Date: Thu, 27 Jun 2024 22:00:39 +0700
Subject: [PATCH 01/11] add convert_nhwc_to_nchw()

---
 mobile_back_apple/cpp/backend_coreml/main.cc | 24 ++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/mobile_back_apple/cpp/backend_coreml/main.cc b/mobile_back_apple/cpp/backend_coreml/main.cc
index a297a46af..bb9e4e3b7 100644
--- a/mobile_back_apple/cpp/backend_coreml/main.cc
+++ b/mobile_back_apple/cpp/backend_coreml/main.cc
@@ -184,3 +184,27 @@ mlperf_status_t mlperf_backend_get_output(mlperf_backend_ptr_t backend_ptr,
     return MLPERF_SUCCESS;
   return MLPERF_FAILURE;
 }
+
+void convert_nhwc_to_nchw(uint8_t *data_nhwc, int N, int H, int W, int C) {
+  uint8_t *data_nchw = new uint8_t[N * C * H * W];
+  for (int n = 0; n < N; ++n) {
+    for (int c = 0; c < C; ++c) {
+      for (int h = 0; h < H; ++h) {
+        for (int w = 0; w < W; ++w) {
+          int index_nchw = ((n * C + c) * H + h) * W + w;
+          int index_nhwc = ((n * H + h) * W + w) * C + c;
+          data_nchw[index_nchw] = data_nhwc[index_nhwc];
+        }
+      }
+    }
+  }
+  std::memcpy(data_nhwc, data_nchw, N * H * W * C * sizeof(uint8_t));
+  delete[] data_nchw;
+}
+
+void mlperf_backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes,
+                                   int width, int height, uint8_t *data) {
+  CoreMLBackendData *backend_data = (CoreMLBackendData *)backend_ptr;
+  int N = 1, H = height, W = width, C = 3;
+  convert_nhwc_to_nchw(data, N, H, W, C);
+}

From d54e986851bf38f384898022625c675d40bce419 Mon Sep 17 00:00:00 2001
From: Anh <anh.app.dev@gmail.com>
Date: Fri, 28 Jun 2024 08:35:58 +0700
Subject: [PATCH 02/11] add test_convert_nhwc_to_nchw()

---
 mobile_back_apple/cpp/backend_coreml/main.cc | 26 ++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/mobile_back_apple/cpp/backend_coreml/main.cc b/mobile_back_apple/cpp/backend_coreml/main.cc
index bb9e4e3b7..63fcac01c 100644
--- a/mobile_back_apple/cpp/backend_coreml/main.cc
+++ b/mobile_back_apple/cpp/backend_coreml/main.cc
@@ -202,9 +202,35 @@ void convert_nhwc_to_nchw(uint8_t *data_nhwc, int N, int H, int W, int C) {
   delete[] data_nchw;
 }
 
+void test_convert_nhwc_to_nchw() {
+  const int N = 1, H = 2, W = 2, C = 3;
+  uint8_t data_nhwc[N * H * W * C] = {
+      1, 2, 3, 4, 5, 6,
+      7, 8, 9, 10, 11, 12
+  };
+  uint8_t expected_data_nchw[N * C * H * W] = {
+      1, 4, 7, 10,
+      2, 5, 8, 11,
+      3, 6, 9, 12
+  };
+
+  convert_nhwc_to_nchw(data_nhwc, N, H, W, C);
+
+  for (int i = 0; i < N * C * H * W; ++i) {
+    if (data_nhwc[i] != expected_data_nchw[i]) {
+      std::cout << "Test failed at index " << i << ": expected "
+                << (int)expected_data_nchw[i] << ", got " << (int)data_nhwc[i]
+                << std::endl;
+      return;
+    }
+  }
+  std::cout << "Test passed!" << std::endl;
+}
+
 void mlperf_backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes,
                                    int width, int height, uint8_t *data) {
   CoreMLBackendData *backend_data = (CoreMLBackendData *)backend_ptr;
   int N = 1, H = height, W = width, C = 3;
   convert_nhwc_to_nchw(data, N, H, W, C);
+  // test_convert_nhwc_to_nchw();
 }

From cfe56c9c7aa9cfa52fd321a5d57975edec48ad1d Mon Sep 17 00:00:00 2001
From: Anh <anh.app.dev@gmail.com>
Date: Tue, 2 Jul 2024 09:44:43 +0700
Subject: [PATCH 03/11] update model path in dev Makefile

---
 mobile_back_apple/dev-utils/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mobile_back_apple/dev-utils/Makefile b/mobile_back_apple/dev-utils/Makefile
index bbe7cbbcb..a20f9ac32 100644
--- a/mobile_back_apple/dev-utils/Makefile
+++ b/mobile_back_apple/dev-utils/Makefile
@@ -12,7 +12,7 @@ export ANDROID_NDK_VERSION=25
 export ANDROID_NDK_API_LEVEL=33
 
 tflite: tflite-build tflite-run-ic-v2 tflite-run-ic-offline-v2
-coreml: coreml-build coreml-run-ic-v2 coreml-run-ic-offline-v2
+coreml: coreml-build coreml-run-ic-v2
 tflite-all: tflite-build tflite-run-ic tflite-run-ic-offline tflite-run-od tflite-run-is tflite-run-lu tflite-run-sr tflite-run-ic-v2 tflite-run-ic-offline-v2
 coreml-all: coreml-build coreml-run-ic coreml-run-ic-offline coreml-run-od coreml-run-is coreml-run-lu coreml-run-sr coreml-run-ic-v2 coreml-run-ic-offline-v2
 
@@ -161,7 +161,7 @@ coreml-run-ic-v2:
 	bazel-bin/flutter/cpp/binary/main EXTERNAL image_classification \
 		--mode=PerformanceOnly \
 		--output_dir="${REPO_ROOT_DIR}/output" \
-		--model_file="${REPO_ROOT_DIR}/mobile_back_apple/dev-resources/mobilenet_v4/MobilenetV4_Large.mlmodel" \
+		--model_file="${REPO_ROOT_DIR}/mobile_back_apple/dev-resources/mobilenet_v4/mobilenetv4_NCHW.mlpackage" \
 		--image_width=384 \
 		--image_height=384 \
 		--lib_path="bazel-bin/mobile_back_apple/cpp/backend_coreml/libcoremlbackend.so" \

From 72d03c39c2dc3d8fccf538bc1adbfe431db8503d Mon Sep 17 00:00:00 2001
From: Anh <anh.app.dev@gmail.com>
Date: Tue, 2 Jul 2024 12:53:47 +0700
Subject: [PATCH 04/11] add quick hack for checking if model expects NCHW input

---
 mobile_back_apple/cpp/backend_coreml/main.cc | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/mobile_back_apple/cpp/backend_coreml/main.cc b/mobile_back_apple/cpp/backend_coreml/main.cc
index 63fcac01c..7fe7860e3 100644
--- a/mobile_back_apple/cpp/backend_coreml/main.cc
+++ b/mobile_back_apple/cpp/backend_coreml/main.cc
@@ -28,6 +28,7 @@ struct CoreMLBackendData {
   const char *vendor = "Apple";
   const char *accelerator{nullptr};
   CoreMLExecutor *coreMLExecutor{nullptr};
+  bool expectNCHW = false;
 };
 
 inline mlperf_data_t::Type MLMultiArrayDataType2MLPerfDataType(
@@ -82,6 +83,10 @@ mlperf_backend_ptr_t mlperf_backend_create(
 
   CoreMLBackendData *backend_data = new CoreMLBackendData();
   backendExists = true;
+  // quick hack for checking if model expects NCHW input.
+  if (strcasestr(model_path, "NCHW") != nullptr) {
+    backend_data->expectNCHW = true;
+  }
 
   // Load the model.
   NSError *error;
@@ -230,7 +235,10 @@ void test_convert_nhwc_to_nchw() {
 void mlperf_backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes,
                                    int width, int height, uint8_t *data) {
   CoreMLBackendData *backend_data = (CoreMLBackendData *)backend_ptr;
-  int N = 1, H = height, W = width, C = 3;
-  convert_nhwc_to_nchw(data, N, H, W, C);
-  // test_convert_nhwc_to_nchw();
+  if (backend_data->expectNCHW) {
+    LOG(INFO) << "Converting inputs from NHWC to NCHW!";
+    int N = 1, H = height, W = width, C = 3;
+    convert_nhwc_to_nchw(data, N, H, W, C);
+    // test_convert_nhwc_to_nchw();
+  }
 }

From 44e6b5a3390713405b717e00dd73fb14b84646c2 Mon Sep 17 00:00:00 2001
From: Anh <anh.app.dev@gmail.com>
Date: Wed, 10 Jul 2024 15:28:36 +0700
Subject: [PATCH 05/11] Add a Python script to convert PyTorch MobileNet V4
 model to Core ML

---
 .../models/mobilenetv4_pytorch.py             | 130 ++++++++++++++++++
 1 file changed, 130 insertions(+)
 create mode 100644 mobile_back_apple/models/mobilenetv4_pytorch.py

diff --git a/mobile_back_apple/models/mobilenetv4_pytorch.py b/mobile_back_apple/models/mobilenetv4_pytorch.py
new file mode 100644
index 000000000..0f314a1d0
--- /dev/null
+++ b/mobile_back_apple/models/mobilenetv4_pytorch.py
@@ -0,0 +1,130 @@
+# Tested with torch==2.3.1, coremltools==8.0b1, timm==1.0.7, macOS 14.5, XCode 16.0 beta
+
+import os
+import timm
+import torch
+import numpy as np
+import coremltools as ct
+import coremltools.optimize.coreml as cto
+from torchvision.transforms import v2
+
+# The following API is for coremltools==8.0b1
+# It will be moved out of "experimental" in later versions of coremltools
+from coremltools.optimize.coreml.experimental import OpActivationLinearQuantizerConfig, \
+  linear_quantize_activations
+from PIL import Image
+
+C = 3
+H = 384
+W = 384
+MODEL_NAME = "hf-hub:timm/mobilenetv4_conv_large.e600_r384_in1k"
+
+def load_dummy_images(count=9):
+  # TODO: Replace this with actual loading of images
+  images = []
+  for _ in range(count):
+    dummy_image = np.random.randint(0, 256, (H, W, C), dtype=np.uint8)
+    images.append(Image.fromarray(dummy_image))
+  return images
+
+
+def load_images_from_folder(folder, max_images=99):
+  images = []
+  filenames = os.listdir(folder)
+  if len(filenames) > max_images:
+    filenames = np.random.choice(filenames, max_images, replace=False)
+  for filename in filenames:
+    if filename.lower().endswith((".jpg", ".jpeg", ".png")):
+      img_path = os.path.join(folder, filename)
+      img = Image.open(img_path).convert('RGB')
+      img_array = np.array(img)
+      images.append(img_array)
+  return images
+
+
+def load_sample_data():
+  # sample_images = load_dummy_images(count=9)
+  folder_path = './imagenet'
+  sample_images = load_images_from_folder(folder_path, max_images=999)
+  print(f'Loaded {len(sample_images)} images from {folder_path}')
+  # mean and std for ImageNet
+  mean = [0.485, 0.456, 0.406]
+  std = [0.229, 0.224, 0.225]
+  transform = v2.Compose([
+    v2.ToImage(),
+    v2.ToDtype(torch.uint8, scale=True),
+    v2.CenterCrop(size=(H, W)),
+    v2.ToDtype(torch.float32, scale=True),
+    v2.Normalize(mean, std)
+  ])
+  sample_data = []
+  for image in sample_images:
+    img_normalized = transform(image)
+    img_np = np.array(img_normalized)
+    img_np = img_np.reshape(1, C, H, W)
+    assert (img_np.shape == (1, C, H, W))
+    sample_data.append({'images': img_np})
+  return sample_data
+
+
+def main():
+  # Load the pretrained model
+  torch_model = timm.create_model(MODEL_NAME, pretrained=True)
+  torch_model.eval()
+
+  # Inspect the model
+  print("num_classes", torch_model.num_classes)
+  print("data_config", timm.data.resolve_model_data_config(torch_model))
+
+  # Trace the model with random data
+  example_input = torch.rand(1, C, H, W)
+  traced_model = torch.jit.trace(torch_model, example_input)
+  _ = traced_model(example_input)
+
+  # Convert the traced model to CoreML
+  ml_model = ct.convert(
+    traced_model,
+    convert_to="mlprogram",
+    inputs=[ct.TensorType(name="images", shape=example_input.shape)],
+    outputs=[ct.TensorType(name="softmax")],
+    # minimum_deployment_target=ct.target.iOS18
+  )
+
+  ml_model.short_description = MODEL_NAME
+
+  ml_model.save("mobilenetv4_fp32.mlpackage")
+  print('Model converted from PyTorch to Core ML.')
+
+  mlmodel_quantized = quantize_weights(ml_model)
+  mlmodel_quantized.save("mobilenetv4_w8.mlpackage")
+
+  sample_data = load_sample_data()
+  mlmodel_quantized = quantize_activations(mlmodel_quantized, sample_data)
+  mlmodel_quantized.save("mobilenetv4_w8a8.mlpackage")
+
+
+def quantize_weights(mlmodel):
+  # quantize weights to 8 bits
+  weight_quant_op_config = cto.OpLinearQuantizerConfig(mode="linear_symmetric",
+                                                       dtype="int8")
+  weight_quant_model_config = cto.OptimizationConfig(weight_quant_op_config)
+  mlmodel_quantized = cto.linear_quantize_weights(mlmodel,
+                                                  weight_quant_model_config)
+  print('Weights quantization finished.')
+  return mlmodel_quantized
+
+
+def quantize_activations(mlmodel, sample_data):
+  # quantize activations to 8 bits
+  act_quant_op_config = OpActivationLinearQuantizerConfig(mode="linear_symmetric",
+                                                          dtype="int8")
+  act_quant_model_config = cto.OptimizationConfig(global_config=act_quant_op_config)
+  mlmodel_quantized = linear_quantize_activations(mlmodel,
+                                                  act_quant_model_config,
+                                                  sample_data=sample_data)
+  print('Activations quantization finished.')
+  return mlmodel_quantized
+
+
+if __name__ == "__main__":
+  main()

From ed194f9043e41542e59c57f767415346221b3fd5 Mon Sep 17 00:00:00 2001
From: Anh <anh.app.dev@gmail.com>
Date: Thu, 11 Jul 2024 10:07:01 +0700
Subject: [PATCH 06/11] Add function to test accuracy of the converted Core ML
 model

---
 .../models/mobilenetv4_pytorch.py             | 141 ++++++++++++------
 1 file changed, 94 insertions(+), 47 deletions(-)

diff --git a/mobile_back_apple/models/mobilenetv4_pytorch.py b/mobile_back_apple/models/mobilenetv4_pytorch.py
index 0f314a1d0..cdef6d6b2 100644
--- a/mobile_back_apple/models/mobilenetv4_pytorch.py
+++ b/mobile_back_apple/models/mobilenetv4_pytorch.py
@@ -17,10 +17,26 @@
 C = 3
 H = 384
 W = 384
-MODEL_NAME = "hf-hub:timm/mobilenetv4_conv_large.e600_r384_in1k"
 
-def load_dummy_images(count=9):
-  # TODO: Replace this with actual loading of images
+INPUT_NAME = 'images'
+OUTPUT_NAME = 'softmax'
+MODEL_NAME = 'hf-hub:timm/mobilenetv4_conv_large.e600_r384_in1k'
+
+MLMODEL_FILE_FP32 = 'mobilenetv4_fp32.mlpackage'
+MLMODEL_FILE_W8 = "mobilenetv4_w8.mlpackage"
+MLMODEL_FILE_W8A8 = "mobilenetv4_w8a8.mlpackage"
+
+IMAGE_DIR = './imagenet'
+LABELS_FILE = 'imagenet_val_full.txt'
+
+
+def load_labels(labels_file: str) -> list[str]:
+  with open(labels_file, 'r') as f:
+    lines = f.readlines()
+    return lines
+
+
+def load_dummy_images(count: int = 9) -> list[Image]:
   images = []
   for _ in range(count):
     dummy_image = np.random.randint(0, 256, (H, W, C), dtype=np.uint8)
@@ -28,25 +44,23 @@ def load_dummy_images(count=9):
   return images
 
 
-def load_images_from_folder(folder, max_images=99):
+def load_images_from_folder(folder: str, max_images: int = None) -> list[Image]:
   images = []
   filenames = os.listdir(folder)
-  if len(filenames) > max_images:
-    filenames = np.random.choice(filenames, max_images, replace=False)
+  filenames.sort()
+  if max_images is not None and len(filenames) > max_images:
+    filenames = filenames[:max_images]
   for filename in filenames:
     if filename.lower().endswith((".jpg", ".jpeg", ".png")):
       img_path = os.path.join(folder, filename)
       img = Image.open(img_path).convert('RGB')
-      img_array = np.array(img)
-      images.append(img_array)
+      images.append(img)
+      print(f'Loaded: {filename}')
+  print(f'Loaded {len(images)} images from {folder}')
   return images
 
 
-def load_sample_data():
-  # sample_images = load_dummy_images(count=9)
-  folder_path = './imagenet'
-  sample_images = load_images_from_folder(folder_path, max_images=999)
-  print(f'Loaded {len(sample_images)} images from {folder_path}')
+def preprocess_images(pil_images: list[Image]) -> list[dict]:
   # mean and std for ImageNet
   mean = [0.485, 0.456, 0.406]
   std = [0.229, 0.224, 0.225]
@@ -57,17 +71,40 @@ def load_sample_data():
     v2.ToDtype(torch.float32, scale=True),
     v2.Normalize(mean, std)
   ])
-  sample_data = []
-  for image in sample_images:
-    img_normalized = transform(image)
-    img_np = np.array(img_normalized)
+  transformed_images = transform(pil_images)
+  data = []
+  for image in transformed_images:
+    img_np = image.numpy()
     img_np = img_np.reshape(1, C, H, W)
     assert (img_np.shape == (1, C, H, W))
-    sample_data.append({'images': img_np})
-  return sample_data
+    data.append({INPUT_NAME: img_np})
+  return data
 
 
-def main():
+def quantize_weights(mlmodel: ct.models.MLModel) -> ct.models.MLModel:
+  # quantize weights to 8 bits
+  weight_quant_op_config = cto.OpLinearQuantizerConfig(mode="linear_symmetric",
+                                                       dtype="int8")
+  weight_quant_model_config = cto.OptimizationConfig(weight_quant_op_config)
+  mlmodel_quantized = cto.linear_quantize_weights(mlmodel,
+                                                  weight_quant_model_config)
+  print('Weights quantization finished.')
+  return mlmodel_quantized
+
+
+def quantize_activations(mlmodel: ct.models.MLModel, sample_data: list[dict]) -> ct.models.MLModel:
+  # quantize activations to 8 bits
+  act_quant_op_config = OpActivationLinearQuantizerConfig(mode="linear_symmetric",
+                                                          dtype="int8")
+  act_quant_model_config = cto.OptimizationConfig(global_config=act_quant_op_config)
+  mlmodel_quantized = linear_quantize_activations(mlmodel,
+                                                  act_quant_model_config,
+                                                  sample_data=sample_data)
+  print('Activations quantization finished.')
+  return mlmodel_quantized
+
+
+def convert_model():
   # Load the pretrained model
   torch_model = timm.create_model(MODEL_NAME, pretrained=True)
   torch_model.eval()
@@ -85,45 +122,55 @@ def main():
   ml_model = ct.convert(
     traced_model,
     convert_to="mlprogram",
-    inputs=[ct.TensorType(name="images", shape=example_input.shape)],
-    outputs=[ct.TensorType(name="softmax")],
+    inputs=[ct.TensorType(name=INPUT_NAME, shape=example_input.shape)],
+    outputs=[ct.TensorType(name=OUTPUT_NAME)],
     # minimum_deployment_target=ct.target.iOS18
   )
 
   ml_model.short_description = MODEL_NAME
 
-  ml_model.save("mobilenetv4_fp32.mlpackage")
+  ml_model.save(MLMODEL_FILE_FP32)
   print('Model converted from PyTorch to Core ML.')
 
   mlmodel_quantized = quantize_weights(ml_model)
-  mlmodel_quantized.save("mobilenetv4_w8.mlpackage")
+  mlmodel_quantized.save(MLMODEL_FILE_W8)
 
-  sample_data = load_sample_data()
+  # pil_images = load_dummy_images(count=9)
+  pil_images = load_images_from_folder(IMAGE_DIR, max_images=999)
+  sample_data = preprocess_images(pil_images)
   mlmodel_quantized = quantize_activations(mlmodel_quantized, sample_data)
-  mlmodel_quantized.save("mobilenetv4_w8a8.mlpackage")
-
-
-def quantize_weights(mlmodel):
-  # quantize weights to 8 bits
-  weight_quant_op_config = cto.OpLinearQuantizerConfig(mode="linear_symmetric",
-                                                       dtype="int8")
-  weight_quant_model_config = cto.OptimizationConfig(weight_quant_op_config)
-  mlmodel_quantized = cto.linear_quantize_weights(mlmodel,
-                                                  weight_quant_model_config)
-  print('Weights quantization finished.')
-  return mlmodel_quantized
+  mlmodel_quantized.save(MLMODEL_FILE_W8A8)
+
+
+def test_accuracy(mlmodel_file: str):
+  expected_labels = load_labels(LABELS_FILE)
+  pil_images = load_images_from_folder(IMAGE_DIR)
+  mlmodel = ct.models.MLModel(mlmodel_file)
+  batch_size = 999
+  correct_predictions = 0
+  total_predictions = 0
+  total_images = len(pil_images)
+  for i in range(0, len(pil_images), batch_size):
+    batch_images = pil_images[i:i + batch_size]
+    image_data = preprocess_images(batch_images)
+    predictions = mlmodel.predict(image_data)
+    assert (len(predictions) == len(image_data))
+    for j in range(len(image_data)):
+      total_predictions += 1
+      predicted_label = np.argmax(predictions[j][OUTPUT_NAME])
+      expected_label = int(expected_labels[i + j])
+      if predicted_label == expected_label:
+        correct_predictions += 1
+    moving_accuracy = correct_predictions / total_predictions
+    print(f'Moving Accuracy: {moving_accuracy * 100:.2f}%. Images processed: {total_predictions}/{total_images}.')
+  assert (total_predictions == len(pil_images))
+  accuracy = correct_predictions / total_predictions
+  print(f'Accuracy: {accuracy * 100:.2f}%. Images processed: {total_predictions}/{total_images}.')
 
 
-def quantize_activations(mlmodel, sample_data):
-  # quantize activations to 8 bits
-  act_quant_op_config = OpActivationLinearQuantizerConfig(mode="linear_symmetric",
-                                                          dtype="int8")
-  act_quant_model_config = cto.OptimizationConfig(global_config=act_quant_op_config)
-  mlmodel_quantized = linear_quantize_activations(mlmodel,
-                                                  act_quant_model_config,
-                                                  sample_data=sample_data)
-  print('Activations quantization finished.')
-  return mlmodel_quantized
+def main():
+  convert_model()
+  test_accuracy(mlmodel_file=MLMODEL_FILE_W8A8)
 
 
 if __name__ == "__main__":

From e2b5d9af42b2e470c1a431bbd50fedd8433acb3c Mon Sep 17 00:00:00 2001
From: Anh <anh.app.dev@gmail.com>
Date: Thu, 11 Jul 2024 12:19:29 +0700
Subject: [PATCH 07/11] Add copyright info

---
 mobile_back_apple/models/mobilenetv4_pytorch.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/mobile_back_apple/models/mobilenetv4_pytorch.py b/mobile_back_apple/models/mobilenetv4_pytorch.py
index cdef6d6b2..6e591562c 100644
--- a/mobile_back_apple/models/mobilenetv4_pytorch.py
+++ b/mobile_back_apple/models/mobilenetv4_pytorch.py
@@ -1,3 +1,20 @@
+# !/usr/bin/env python3
+# coding: utf-8
+
+# Copyright 2024 The MLPerf Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Tested with torch==2.3.1, coremltools==8.0b1, timm==1.0.7, macOS 14.5, XCode 16.0 beta
 
 import os

From 4bb7599878164d273e436f1f1cbdad60528405af Mon Sep 17 00:00:00 2001
From: Anh <anh.app.dev@gmail.com>
Date: Mon, 5 Aug 2024 09:16:29 +0700
Subject: [PATCH 08/11] Fix convert_nhwc_to_nchw

---
 mobile_back_apple/cpp/backend_coreml/main.cc | 70 +++++++-------------
 1 file changed, 25 insertions(+), 45 deletions(-)

diff --git a/mobile_back_apple/cpp/backend_coreml/main.cc b/mobile_back_apple/cpp/backend_coreml/main.cc
index 7fe7860e3..127c31b6d 100644
--- a/mobile_back_apple/cpp/backend_coreml/main.cc
+++ b/mobile_back_apple/cpp/backend_coreml/main.cc
@@ -46,6 +46,24 @@ inline mlperf_data_t::Type MLMultiArrayDataType2MLPerfDataType(
 
 static bool backendExists = false;
 
+template <typename T>
+void convert_nhwc_to_nchw(T* data_nhwc, int N, int H, int W, int C) {
+  T* data_nchw = new T[N * C * H * W];
+  for (int n = 0; n < N; ++n) {
+    for (int c = 0; c < C; ++c) {
+      for (int h = 0; h < H; ++h) {
+        for (int w = 0; w < W; ++w) {
+          int index_nchw = ((n * C + c) * H + h) * W + w;
+          int index_nhwc = ((n * H + h) * W + w) * C + c;
+          data_nchw[index_nchw] = data_nhwc[index_nhwc];
+        }
+      }
+    }
+  }
+  std::memcpy(data_nhwc, data_nchw, N * H * W * C * sizeof(T));
+  delete[] data_nchw;
+}
+
 // Return the name of the backend
 const char *mlperf_backend_vendor_name(mlperf_backend_ptr_t backend_ptr) {
   return ((CoreMLBackendData *)backend_ptr)->vendor;
@@ -86,6 +104,7 @@ mlperf_backend_ptr_t mlperf_backend_create(
   // quick hack for checking if model expects NCHW input.
   if (strcasestr(model_path, "NCHW") != nullptr) {
     backend_data->expectNCHW = true;
+    LOG(INFO) << "Will convert inputs from NHWC to NCHW!";
   }
 
   // Load the model.
@@ -152,6 +171,11 @@ mlperf_status_t mlperf_backend_set_input(mlperf_backend_ptr_t backend_ptr,
                                          int32_t batch_index, int32_t i,
                                          void *data) {
   CoreMLBackendData *backend_data = (CoreMLBackendData *)backend_ptr;
+//  if (backend_data->expectNCHW) {
+//    LOG(INFO) << "Converting inputs from NHWC to NCHW!";
+//    int N = 1, H = 384, W = 384, C = 3;
+//    convert_nhwc_to_nchw(reinterpret_cast<float*>(data), N, H, W, C);
+//  }
   if ([backend_data->coreMLExecutor setInputData:data
                                               at:i
                                       batchIndex:batch_index])
@@ -190,55 +214,11 @@ mlperf_status_t mlperf_backend_get_output(mlperf_backend_ptr_t backend_ptr,
   return MLPERF_FAILURE;
 }
 
-void convert_nhwc_to_nchw(uint8_t *data_nhwc, int N, int H, int W, int C) {
-  uint8_t *data_nchw = new uint8_t[N * C * H * W];
-  for (int n = 0; n < N; ++n) {
-    for (int c = 0; c < C; ++c) {
-      for (int h = 0; h < H; ++h) {
-        for (int w = 0; w < W; ++w) {
-          int index_nchw = ((n * C + c) * H + h) * W + w;
-          int index_nhwc = ((n * H + h) * W + w) * C + c;
-          data_nchw[index_nchw] = data_nhwc[index_nhwc];
-        }
-      }
-    }
-  }
-  std::memcpy(data_nhwc, data_nchw, N * H * W * C * sizeof(uint8_t));
-  delete[] data_nchw;
-}
-
-void test_convert_nhwc_to_nchw() {
-  const int N = 1, H = 2, W = 2, C = 3;
-  uint8_t data_nhwc[N * H * W * C] = {
-      1, 2, 3, 4, 5, 6,
-      7, 8, 9, 10, 11, 12
-  };
-  uint8_t expected_data_nchw[N * C * H * W] = {
-      1, 4, 7, 10,
-      2, 5, 8, 11,
-      3, 6, 9, 12
-  };
-
-  convert_nhwc_to_nchw(data_nhwc, N, H, W, C);
-
-  for (int i = 0; i < N * C * H * W; ++i) {
-    if (data_nhwc[i] != expected_data_nchw[i]) {
-      std::cout << "Test failed at index " << i << ": expected "
-                << (int)expected_data_nchw[i] << ", got " << (int)data_nhwc[i]
-                << std::endl;
-      return;
-    }
-  }
-  std::cout << "Test passed!" << std::endl;
-}
-
 void mlperf_backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes,
                                    int width, int height, uint8_t *data) {
   CoreMLBackendData *backend_data = (CoreMLBackendData *)backend_ptr;
   if (backend_data->expectNCHW) {
-    LOG(INFO) << "Converting inputs from NHWC to NCHW!";
     int N = 1, H = height, W = width, C = 3;
-    convert_nhwc_to_nchw(data, N, H, W, C);
-    // test_convert_nhwc_to_nchw();
+    convert_nhwc_to_nchw(reinterpret_cast<float*>(data), N, H, W, C);
   }
 }

From a571497cde6863d6102fefd58bc560474969847e Mon Sep 17 00:00:00 2001
From: Anh <anh.app.dev@gmail.com>
Date: Mon, 5 Aug 2024 09:32:41 +0700
Subject: [PATCH 09/11] Update coreml_settings.pbtxt

---
 .../cpp/backend_coreml/coreml_settings.pbtxt  | 24 +++++++++----------
 mobile_back_apple/dev-utils/Makefile          |  2 +-
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt b/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt
index 758d675cc..1e2bea519 100644
--- a/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt
+++ b/mobile_back_apple/cpp/backend_coreml/coreml_settings.pbtxt
@@ -65,22 +65,22 @@ benchmark_setting {
     delegate_name: "CPU & GPU & ANE"
     accelerator_name: "cpu&gpu&ane"
     accelerator_desc: "All compute units"
-    model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-coreml/MobilenetV4_Large.mlmodel"
-    model_checksum: "fbfba70596f74010852f22fa04721202"
+    model_path: "https://github.com/anhappdev/tmp/releases/download/1/mobilenetv4_fp32_NCHW.mlpackage.zip"
+    model_checksum: "164c504eb3e9af6c730c1765b8b81b32"
   }
   delegate_choice: {
     delegate_name: "CPU & GPU"
     accelerator_name: "cpu&gpu"
     accelerator_desc: "CPU and GPU"
-    model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-coreml/MobilenetV4_Large.mlmodel"
-    model_checksum: "fbfba70596f74010852f22fa04721202"
+    model_path: "https://github.com/anhappdev/tmp/releases/download/1/mobilenetv4_fp32_NCHW.mlpackage.zip"
+    model_checksum: "164c504eb3e9af6c730c1765b8b81b32"
   }
   delegate_choice: {
     delegate_name: "CPU & ANE"
     accelerator_name: "cpu&ane"
     accelerator_desc: "CPU and Neural Engine"
-    model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-coreml/MobilenetV4_Large.mlmodel"
-    model_checksum: "fbfba70596f74010852f22fa04721202"
+    model_path: "https://github.com/anhappdev/tmp/releases/download/1/mobilenetv4_fp32_NCHW.mlpackage.zip"
+    model_checksum: "164c504eb3e9af6c730c1765b8b81b32"
   }
   delegate_selected: "CPU & GPU & ANE"
 }
@@ -93,24 +93,24 @@ benchmark_setting {
     accelerator_name: "cpu&gpu&ane"
     accelerator_desc: "All compute units"
     batch_size: 32
-    model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-coreml/MobilenetV4_Large.mlmodel"
-    model_checksum: "fbfba70596f74010852f22fa04721202"
+    model_path: "https://github.com/anhappdev/tmp/releases/download/1/mobilenetv4_fp32_NCHW.mlpackage.zip"
+    model_checksum: "164c504eb3e9af6c730c1765b8b81b32"
   }
   delegate_choice: {
     delegate_name: "CPU & GPU"
     accelerator_name: "cpu&gpu"
     accelerator_desc: "CPU and GPU"
     batch_size: 32
-    model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-coreml/MobilenetV4_Large.mlmodel"
-    model_checksum: "fbfba70596f74010852f22fa04721202"
+    model_path: "https://github.com/anhappdev/tmp/releases/download/1/mobilenetv4_fp32_NCHW.mlpackage.zip"
+    model_checksum: "164c504eb3e9af6c730c1765b8b81b32"
   }
   delegate_choice: {
     delegate_name: "CPU & ANE"
     accelerator_name: "cpu&ane"
     accelerator_desc: "CPU and Neural Engine"
     batch_size: 32
-    model_path: "https://github.com/mlcommons/mobile_models/releases/download/v4.0-coreml/MobilenetV4_Large.mlmodel"
-    model_checksum: "fbfba70596f74010852f22fa04721202"
+    model_path: "https://github.com/anhappdev/tmp/releases/download/1/mobilenetv4_fp32_NCHW.mlpackage.zip"
+    model_checksum: "164c504eb3e9af6c730c1765b8b81b32"
   }
   delegate_selected: "CPU & GPU & ANE"
 }
diff --git a/mobile_back_apple/dev-utils/Makefile b/mobile_back_apple/dev-utils/Makefile
index a20f9ac32..3f64e8dae 100644
--- a/mobile_back_apple/dev-utils/Makefile
+++ b/mobile_back_apple/dev-utils/Makefile
@@ -161,7 +161,7 @@ coreml-run-ic-v2:
 	bazel-bin/flutter/cpp/binary/main EXTERNAL image_classification \
 		--mode=PerformanceOnly \
 		--output_dir="${REPO_ROOT_DIR}/output" \
-		--model_file="${REPO_ROOT_DIR}/mobile_back_apple/dev-resources/mobilenet_v4/mobilenetv4_NCHW.mlpackage" \
+		--model_file="${REPO_ROOT_DIR}/mobile_back_apple/dev-resources/mobilenet_v4/mobilenetv4_fp32_NCHW.mlpackage" \
 		--image_width=384 \
 		--image_height=384 \
 		--lib_path="bazel-bin/mobile_back_apple/cpp/backend_coreml/libcoremlbackend.so" \

From 041010e401dcd698960e6a920fe20fc6101e8c34 Mon Sep 17 00:00:00 2001
From: Anh <anh.app.dev@gmail.com>
Date: Mon, 5 Aug 2024 09:45:40 +0700
Subject: [PATCH 10/11] Format files

---
 mobile_back_apple/.gitignore                 |  1 +
 mobile_back_apple/cpp/backend_coreml/main.cc | 11 +++--------
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/mobile_back_apple/.gitignore b/mobile_back_apple/.gitignore
index c7058d2ef..0fbca8bfc 100644
--- a/mobile_back_apple/.gitignore
+++ b/mobile_back_apple/.gitignore
@@ -1,4 +1,5 @@
 dev-resources
+models/*.mlpackage
 
 # Created by https://www.toptal.com/developers/gitignore/api/xcode
 # Edit at https://www.toptal.com/developers/gitignore?templates=xcode
diff --git a/mobile_back_apple/cpp/backend_coreml/main.cc b/mobile_back_apple/cpp/backend_coreml/main.cc
index 127c31b6d..a2246345f 100644
--- a/mobile_back_apple/cpp/backend_coreml/main.cc
+++ b/mobile_back_apple/cpp/backend_coreml/main.cc
@@ -47,8 +47,8 @@ inline mlperf_data_t::Type MLMultiArrayDataType2MLPerfDataType(
 static bool backendExists = false;
 
 template <typename T>
-void convert_nhwc_to_nchw(T* data_nhwc, int N, int H, int W, int C) {
-  T* data_nchw = new T[N * C * H * W];
+void convert_nhwc_to_nchw(T *data_nhwc, int N, int H, int W, int C) {
+  T *data_nchw = new T[N * C * H * W];
   for (int n = 0; n < N; ++n) {
     for (int c = 0; c < C; ++c) {
       for (int h = 0; h < H; ++h) {
@@ -171,11 +171,6 @@ mlperf_status_t mlperf_backend_set_input(mlperf_backend_ptr_t backend_ptr,
                                          int32_t batch_index, int32_t i,
                                          void *data) {
   CoreMLBackendData *backend_data = (CoreMLBackendData *)backend_ptr;
-//  if (backend_data->expectNCHW) {
-//    LOG(INFO) << "Converting inputs from NHWC to NCHW!";
-//    int N = 1, H = 384, W = 384, C = 3;
-//    convert_nhwc_to_nchw(reinterpret_cast<float*>(data), N, H, W, C);
-//  }
   if ([backend_data->coreMLExecutor setInputData:data
                                               at:i
                                       batchIndex:batch_index])
@@ -219,6 +214,6 @@ void mlperf_backend_convert_inputs(mlperf_backend_ptr_t backend_ptr, int bytes,
   CoreMLBackendData *backend_data = (CoreMLBackendData *)backend_ptr;
   if (backend_data->expectNCHW) {
     int N = 1, H = height, W = width, C = 3;
-    convert_nhwc_to_nchw(reinterpret_cast<float*>(data), N, H, W, C);
+    convert_nhwc_to_nchw(reinterpret_cast<float *>(data), N, H, W, C);
   }
 }

From 21cf7a0cde36cd619862a3ba18abe7387aa7e8e3 Mon Sep 17 00:00:00 2001
From: Anh <anh.app.dev@gmail.com>
Date: Mon, 5 Aug 2024 10:05:01 +0700
Subject: [PATCH 11/11] Update Makefile

---
 mobile_back_apple/dev-utils/Makefile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mobile_back_apple/dev-utils/Makefile b/mobile_back_apple/dev-utils/Makefile
index 3f64e8dae..6bdcb31c3 100644
--- a/mobile_back_apple/dev-utils/Makefile
+++ b/mobile_back_apple/dev-utils/Makefile
@@ -12,7 +12,7 @@ export ANDROID_NDK_VERSION=25
 export ANDROID_NDK_API_LEVEL=33
 
 tflite: tflite-build tflite-run-ic-v2 tflite-run-ic-offline-v2
-coreml: coreml-build coreml-run-ic-v2
+coreml: coreml-build coreml-run-ic-v2 coreml-run-ic-offline-v2
 tflite-all: tflite-build tflite-run-ic tflite-run-ic-offline tflite-run-od tflite-run-is tflite-run-lu tflite-run-sr tflite-run-ic-v2 tflite-run-ic-offline-v2
 coreml-all: coreml-build coreml-run-ic coreml-run-ic-offline coreml-run-od coreml-run-is coreml-run-lu coreml-run-sr coreml-run-ic-v2 coreml-run-ic-offline-v2
 
@@ -158,7 +158,7 @@ coreml-run-ic-offline:
 
 coreml-run-ic-v2:
 	cd ${REPO_ROOT_DIR} && \
-	bazel-bin/flutter/cpp/binary/main EXTERNAL image_classification \
+	bazel-bin/flutter/cpp/binary/main EXTERNAL image_classification_v2 \
 		--mode=PerformanceOnly \
 		--output_dir="${REPO_ROOT_DIR}/output" \
 		--model_file="${REPO_ROOT_DIR}/mobile_back_apple/dev-resources/mobilenet_v4/mobilenetv4_fp32_NCHW.mlpackage" \
@@ -171,7 +171,7 @@ coreml-run-ic-v2:
 
 coreml-run-ic-offline-v2:
 	cd ${REPO_ROOT_DIR} && \
-	bazel-bin/flutter/cpp/binary/main EXTERNAL image_classification_offline \
+	bazel-bin/flutter/cpp/binary/main EXTERNAL image_classification_offline_v2 \
 		--mode=PerformanceOnly \
 		--scenario=Offline \
 		--batch_size=32 \