From 778bc853978e2ac72c0f20dca9e8364b6f5c8a44 Mon Sep 17 00:00:00 2001
From: nailixing <xnlainus809>
Date: Mon, 1 Jun 2020 21:27:36 +0800
Subject: [PATCH] Add darknet code dependences, base detection modele and make
 models compatiable with singa-auto

---
 .../models/image_object_detection/food101.py  |  28 +++
 .../models/image_object_detection/food172.py  |  27 +++
 .../models/image_object_detection/food231.py  |  27 +++
 .../models/image_object_detection/food256.py  |  27 +++
 .../models/image_object_detection/food_55.py  |  27 +++
 .../food_darknet_xception.py                  | 148 ------------
 .../food_darknet_xception1.py                 | 148 ------------
 .../food_darknet_xception2.py                 | 148 ------------
 singa_auto/darknet/__init__.py                |   0
 singa_auto/darknet/darknet.py                 | 159 +++++++++++++
 .../darknet/food_objection_base_model.py      | 224 ++++++++++++++++++
 11 files changed, 519 insertions(+), 444 deletions(-)
 create mode 100644 examples/models/image_object_detection/food101.py
 create mode 100644 examples/models/image_object_detection/food172.py
 create mode 100644 examples/models/image_object_detection/food231.py
 create mode 100644 examples/models/image_object_detection/food256.py
 create mode 100644 examples/models/image_object_detection/food_55.py
 delete mode 100644 examples/models/image_object_detection/food_darknet_xception.py
 delete mode 100644 examples/models/image_object_detection/food_darknet_xception1.py
 delete mode 100644 examples/models/image_object_detection/food_darknet_xception2.py
 create mode 100644 singa_auto/darknet/__init__.py
 create mode 100644 singa_auto/darknet/darknet.py
 create mode 100644 singa_auto/darknet/food_objection_base_model.py
diff --git a/examples/models/image_object_detection/food101.py b/examples/models/image_object_detection/food101.py
new file mode 100644
index 00000000..39256e09
--- /dev/null
+++ b/examples/models/image_object_detection/food101.py
@@ -0,0 +1,28 @@
+from singa_auto.darknet.food_objection_base_model import FoodDetectionBase
+from keras.applications.xception import Xception
+
+
+class FoodDetection101(FoodDetectionBase):
+
+    def __init__(self, **knobs):
+
+        super().__init__(clf_model_class_name=Xception, **knobs)
+
+        # pre config
+        self.classes = 101
+        self.image_size = 299
+
+        # preload files
+        self.yolo_cfg_name = "yolov3-food.cfg"
+        self.yolo_weight_name = "yolov3-food_final.weights"
+        self.food_name = "food.names"
+
+        # this is the model file downloaded from internet,
+        # can choose download locally and upload , or download from server
+        # if download at server side, leave it to none
+        self.preload_clf_model_weights_name = None
+
+        # this is the trained model
+        self.trained_clf_model_weights_name = "xception-F101-0.85.h5"
+
+        self._npy_index_name = "food101.npy"
diff --git a/examples/models/image_object_detection/food172.py b/examples/models/image_object_detection/food172.py
new file mode 100644
index 00000000..8a7341bc
--- /dev/null
+++ b/examples/models/image_object_detection/food172.py
@@ -0,0 +1,27 @@
+from keras.applications.xception import Xception
+from singa_auto.darknet.food_objection_base_model import FoodDetectionBase
+
+
+class FoodDetection172(FoodDetectionBase):
+
+    def __init__(self, **knobs):
+        super().__init__(clf_model_class_name=Xception, **knobs)
+
+        # pre config
+        self.classes = 172
+        self.image_size = 299
+
+        # preload files
+        self.yolo_cfg_name = "yolov3-food.cfg"
+        self.yolo_weight_name = "yolov3-food_final.weights"
+        self.food_name = "food.names"
+
+        # this is the model file downloaded from internet,
+        # can choose download locally and upload , or download from server
+        # if download at server side, leave it to none
+        self.preload_clf_model_weights_name = None
+
+        # this is the trained model
+        self.trained_clf_model_weights_name = "xception-800_F172-0.86.h5"
+
+        self._npy_index_name = "food172.npy"
diff --git a/examples/models/image_object_detection/food231.py b/examples/models/image_object_detection/food231.py
new file mode 100644
index 00000000..86324f2d
--- /dev/null
+++ b/examples/models/image_object_detection/food231.py
@@ -0,0 +1,27 @@
+from keras.applications.xception import Xception
+from singa_auto.darknet.food_objection_base_model import FoodDetectionBase
+
+
+class FoodDetection231(FoodDetectionBase):
+
+    def __init__(self, **knobs):
+        super().__init__(clf_model_class_name=Xception, **knobs)
+
+        # pre config
+        self.classes = 231
+        self.image_size = 299
+
+        # preload files
+        self.yolo_cfg_name = "yolov3-food.cfg"
+        self.yolo_weight_name = "yolov3-food_final.weights"
+        self.food_name = "food.names"
+
+        # this is the model file downloaded from internet,
+        # can choose download locally and upload , or download from server
+        # if download at server side, leave it to none
+        self.preload_clf_model_weights_name = None
+
+        # this is the trained model
+        self.trained_clf_model_weights_name = "xception-food231-0-15-0.82.h5"
+
+        self._npy_index_name = "food231.npy"
diff --git a/examples/models/image_object_detection/food256.py b/examples/models/image_object_detection/food256.py
new file mode 100644
index 00000000..1bda2fa2
--- /dev/null
+++ b/examples/models/image_object_detection/food256.py
@@ -0,0 +1,27 @@
+from keras.applications.inception_resnet_v2 import InceptionResNetV2
+from singa_auto.darknet.food_objection_base_model import FoodDetectionBase
+
+
+class FoodDetection256(FoodDetectionBase):
+
+    def __init__(self, **knobs):
+        super().__init__(clf_model_class_name=InceptionResNetV2, **knobs)
+
+        # pre config
+        self.classes = 256
+        self.image_size = 299
+
+        # preload files
+        self.yolo_cfg_name = "yolov3-food.cfg"
+        self.yolo_weight_name = "yolov3-food_final.weights"
+        self.food_name = "food.names"
+
+        # this is the model file downloaded from internet,
+        # can choose download locally and upload , or download from server
+        # if download at server side, leave it to none
+        self.preload_clf_model_weights_name = "inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5"
+
+        # this is the trained model
+        self.trained_clf_model_weights_name = "inceptionresnet-U256-0.73.h5"
+
+        self._npy_index_name = "uec256.npy"
diff --git a/examples/models/image_object_detection/food_55.py b/examples/models/image_object_detection/food_55.py
new file mode 100644
index 00000000..ff0e580a
--- /dev/null
+++ b/examples/models/image_object_detection/food_55.py
@@ -0,0 +1,27 @@
+from keras.applications.inception_resnet_v2 import InceptionResNetV2
+from singa_auto.darknet.food_objection_base_model import FoodDetectionBase
+
+
+class FoodDetection55(FoodDetectionBase):
+
+    def __init__(self, **knobs):
+        super().__init__(clf_model_class_name=InceptionResNetV2, **knobs)
+
+        # pre config
+        self.classes = 55
+        self.image_size = 299
+
+        # preload files
+        self.yolo_cfg_name = "yolov3-food.cfg"
+        self.yolo_weight_name = "yolov3-food_final.weights"
+        self.food_name = "food.names"
+
+        # this is the model file downloaded from internet,
+        # can choose download locally and upload , or download from server
+        # if download at server side, leave it to none
+        self.preload_clf_model_weights_name = "inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5"
+
+        # this is the trained model
+        self.trained_clf_model_weights_name = "inceptionresnet-FC55-0.86.h5"
+
+        self._npy_index_name = "food55.npy"
diff --git a/examples/models/image_object_detection/food_darknet_xception.py b/examples/models/image_object_detection/food_darknet_xception.py
deleted file mode 100644
index a8dbb17e..00000000
--- a/examples/models/image_object_detection/food_darknet_xception.py
+++ /dev/null
@@ -1,148 +0,0 @@
-import base64
-import io
-import os
-import tempfile
-import zipfile
-from typing import List
-
-import numpy as np
-from PIL import Image
-
-from keras.models import Model
-from keras.layers import Dense
-from keras.preprocessing.image import img_to_array
-from keras.applications.xception import Xception
-from singa_auto.custom_models_base.darknet import darknet
-from singa_auto.model import BaseModel
-
-
-class FoodDetection(BaseModel):
-    '''
-    Implements Xception on Keras for IMAGE_CLASSIFICATION
-    '''
-
-    def __init__(self, **knobs):
-        super().__init__(**knobs)
-        self.xception_model = None
-        self.det_net = None
-        self.det_meta = None
-        self.classes = 231
-        self.image_size = 299
-        self.class_dict = {}
-        self.npy_index = "singa_auto/custom_models_base/darknet/cfg/food/food231.npy"
-
-    def train(self, dataset_path, **kwargs):
-        pass
-
-    def get_knob_config(self):
-        pass
-
-    def evaluate(self, dataset_path):
-        pass
-
-    def destroy(self):
-        pass
-
-    def dump_parameters(self):
-        pass
-
-    def predict(self, queries):
-        res = []
-        queries = [self.image_to_byte_array(ele) for ele in queries]
-
-        for img_bytes in queries:
-            with tempfile.NamedTemporaryFile() as tmp:
-                with open(tmp.name, 'wb') as f:
-                    f.write(img_bytes)
-                img_path = tmp.name
-                img = Image.open(img_path)
-                width, height = img.size[0], img.size[1]
-                predications = self._recognition(img_path)
-
-                result = dict()
-                result['status'] = "ok"
-                result['predictions'] = []
-
-                for index, box in enumerate(predications):
-                    prob = box[1]
-                    x, y, w, h = box[2][0], box[2][1], box[2][2], box[2][3]
-                    left = x - w / 2
-                    upper = y - h / 2
-                    right = x + w / 2
-                    down = y + h / 2
-                    cropped = img.crop((x - w / 2, y - h / 2, x + w / 2, y + h / 2))  # (left, upper, right, lower)
-                    y = self._classify(cropped)
-
-                    class_id = np.argsort(y[0])[::-1][0]
-                    str_class = self.class_dict[class_id]
-                    jbox = dict()
-                    jbox['label_id'] = str(class_id)
-                    jbox['label'] = str(str_class)
-                    jbox['probability'] = prob
-                    # y_min,x_min,y_max,x_max
-
-                    jbox['detection_box'] = [max(0, upper / height), max(0, left / width),
-                                             min(1, down / height), min(1, right / width)]
-
-                    result['predictions'].append(jbox)
-
-                res.append(result)
-        return res
-
-    def load_parameters(self, params):
-
-        self.class_dict = {v: k for k, v in np.load(self.npy_index)[()].items()}
-
-        zip_file_base64 = params['zip_file_base64']
-
-        self.xception_model = self._build_model(classes=self.classes, image_size=self.image_size)
-
-        with tempfile.NamedTemporaryFile() as tmp:
-            # Convert back to bytes & write to temp file
-            zip_file_base64 = base64.b64decode(zip_file_base64.encode('utf-8'))
-            with open(tmp.name, 'wb') as f:
-                f.write(zip_file_base64)
-            with tempfile.TemporaryDirectory() as d:
-                dataset_zipfile = zipfile.ZipFile(tmp.name, 'r')
-                dataset_zipfile.extractall(path=d)
-                dataset_zipfile.close()
-
-                for file_name in os.listdir(d):
-                    if "yolo" in file_name:
-                        self.det_net = darknet.load_net(b"./singa_auto/custom_models_base/darknet/cfg/food/yolov3-food.cfg",
-                                                        os.path.join(d, file_name).encode(), 0)
-
-                        self.det_meta = darknet.load_meta(b"./singa_auto/custom_models_base/darknet/cfg/food/food.data")
-
-                    if "xception" in file_name:
-                        self.xception_model.load_weights(os.path.join(d, file_name))
-
-    def _build_model(self, classes, image_size):
-        base_model = Xception(include_top=True, input_shape=(image_size, image_size, 3))
-        base_model.layers.pop()
-        predictions = Dense(classes, activation='softmax')(base_model.layers[-1].output)
-        clf_model = Model(input=base_model.input, output=[predictions])
-        return clf_model
-
-    def _recognition(self, img_path):
-        res = darknet.detect(self.det_net, self.det_meta, str.encode(img_path))
-        return res
-
-    def _classify(self, img):
-        width_height_tuple = (self.image_size, self.image_size)
-        if (img.size != width_height_tuple):
-            img = img.resize(width_height_tuple, Image.NEAREST)
-        x = img_to_array(img)
-        x /= 255 * 1.
-        x = x.reshape((1,) + x.shape)
-        y = self.xception_model.predict(x)
-        return y
-
-    @staticmethod
-    def image_to_byte_array(query: List[str]):
-        query = np.asarray(query).astype(np.uint8)
-        image = Image.fromarray(query)
-        imgByteArr = io.BytesIO()
-        image.save(imgByteArr, format='JPEG')
-        imgByteArr = imgByteArr.getvalue()
-        return imgByteArr
diff --git a/examples/models/image_object_detection/food_darknet_xception1.py b/examples/models/image_object_detection/food_darknet_xception1.py
deleted file mode 100644
index 638e8a40..00000000
--- a/examples/models/image_object_detection/food_darknet_xception1.py
+++ /dev/null
@@ -1,148 +0,0 @@
-import base64
-import io
-import os
-import tempfile
-import zipfile
-from typing import List
-
-import numpy as np
-from PIL import Image
-
-from keras.models import Model
-from keras.layers import Dense
-from keras.preprocessing.image import img_to_array
-from keras.applications.xception import Xception
-from singa_auto.custom_models_base.darknet import darknet
-from singa_auto.model import BaseModel
-
-
-class FoodDetection(BaseModel):
-    '''
-    Implements Xception on Keras for IMAGE_CLASSIFICATION
-    '''
-
-    def __init__(self, **knobs):
-        super().__init__(**knobs)
-        self.xception_model = None
-        self.det_net = None
-        self.det_meta = None
-        self.classes = 44
-        self.image_size = 299
-        self.class_dict = {}
-        self.npy_index = "singa_auto/custom_models_base/darknet/cfg/food/food44.npy"
-
-    def train(self, dataset_path, **kwargs):
-        pass
-
-    def get_knob_config(self):
-        pass
-
-    def evaluate(self, dataset_path):
-        pass
-
-    def destroy(self):
-        pass
-
-    def dump_parameters(self):
-        pass
-
-    def predict(self, queries):
-        res = []
-        queries = [self.image_to_byte_array(ele) for ele in queries]
-
-        for img_bytes in queries:
-            with tempfile.NamedTemporaryFile() as tmp:
-                with open(tmp.name, 'wb') as f:
-                    f.write(img_bytes)
-                img_path = tmp.name
-                img = Image.open(img_path)
-                width, height = img.size[0], img.size[1]
-                predications = self._recognition(img_path)
-
-                result = dict()
-                result['status'] = "ok"
-                result['predictions'] = []
-
-                for index, box in enumerate(predications):
-                    prob = box[1]
-                    x, y, w, h = box[2][0], box[2][1], box[2][2], box[2][3]
-                    left = x - w / 2
-                    upper = y - h / 2
-                    right = x + w / 2
-                    down = y + h / 2
-                    cropped = img.crop((x - w / 2, y - h / 2, x + w / 2, y + h / 2))  # (left, upper, right, lower)
-                    y = self._classify(cropped)
-
-                    class_id = np.argsort(y[0])[::-1][0]
-                    str_class = self.class_dict[class_id]
-                    jbox = dict()
-                    jbox['label_id'] = str(class_id)
-                    jbox['label'] = str(str_class)
-                    jbox['probability'] = prob
-                    # y_min,x_min,y_max,x_max
-
-                    jbox['detection_box'] = [max(0, upper / height), max(0, left / width),
-                                             min(1, down / height), min(1, right / width)]
-
-                    result['predictions'].append(jbox)
-
-                res.append(result)
-        return res
-
-    def load_parameters(self, params):
-
-        self.class_dict = {v: k for k, v in np.load(self.npy_index)[()].items()}
-
-        zip_file_base64 = params['zip_file_base64']
-
-        self.xception_model = self._build_model(classes=self.classes, image_size=self.image_size)
-
-        with tempfile.NamedTemporaryFile() as tmp:
-            # Convert back to bytes & write to temp file
-            zip_file_base64 = base64.b64decode(zip_file_base64.encode('utf-8'))
-            with open(tmp.name, 'wb') as f:
-                f.write(zip_file_base64)
-            with tempfile.TemporaryDirectory() as d:
-                dataset_zipfile = zipfile.ZipFile(tmp.name, 'r')
-                dataset_zipfile.extractall(path=d)
-                dataset_zipfile.close()
-
-                for file_name in os.listdir(d):
-                    if "yolo" in file_name:
-                        self.det_net = darknet.load_net(b"./singa_auto/custom_models_base/darknet/cfg/food/yolov3-food.cfg",
-                                                        os.path.join(d, file_name).encode(), 0)
-
-                        self.det_meta = darknet.load_meta(b"./singa_auto/custom_models_base/darknet/cfg/food/food.data")
-
-                    if "xception" in file_name:
-                        self.xception_model.load_weights(os.path.join(d, file_name))
-
-    def _build_model(self, classes, image_size):
-        base_model = Xception(include_top=True, input_shape=(image_size, image_size, 3))
-        base_model.layers.pop()
-        predictions = Dense(classes, activation='softmax')(base_model.layers[-1].output)
-        clf_model = Model(input=base_model.input, output=[predictions])
-        return clf_model
-
-    def _recognition(self, img_path):
-        res = darknet.detect(self.det_net, self.det_meta, str.encode(img_path))
-        return res
-
-    def _classify(self, img):
-        width_height_tuple = (self.image_size, self.image_size)
-        if (img.size != width_height_tuple):
-            img = img.resize(width_height_tuple, Image.NEAREST)
-        x = img_to_array(img)
-        x /= 255 * 1.
-        x = x.reshape((1,) + x.shape)
-        y = self.xception_model.predict(x)
-        return y
-
-    @staticmethod
-    def image_to_byte_array(query: List[str]):
-        query = np.asarray(query).astype(np.uint8)
-        image = Image.fromarray(query)
-        imgByteArr = io.BytesIO()
-        image.save(imgByteArr, format='JPEG')
-        imgByteArr = imgByteArr.getvalue()
-        return imgByteArr
diff --git a/examples/models/image_object_detection/food_darknet_xception2.py b/examples/models/image_object_detection/food_darknet_xception2.py
deleted file mode 100644
index 9faa0c96..00000000
--- a/examples/models/image_object_detection/food_darknet_xception2.py
+++ /dev/null
@@ -1,148 +0,0 @@
-import base64
-import io
-import os
-import tempfile
-import zipfile
-from typing import List
-
-import numpy as np
-from PIL import Image
-
-from keras.models import Model
-from keras.layers import Dense
-from keras.preprocessing.image import img_to_array
-from keras.applications.xception import Xception
-from singa_auto.custom_models_base.darknet import darknet
-from singa_auto.model import BaseModel
-
-
-class FoodDetection(BaseModel):
-    '''
-    Implements Xception on Keras for IMAGE_CLASSIFICATION
-    '''
-
-    def __init__(self, **knobs):
-        super().__init__(**knobs)
-        self.xception_model = None
-        self.det_net = None
-        self.det_meta = None
-        self.classes = 101
-        self.image_size = 299
-        self.class_dict = {}
-        self.npy_index = "singa_auto/custom_models_base/darknet/cfg/food/food101.npy"
-
-    def train(self, dataset_path, **kwargs):
-        pass
-
-    def get_knob_config(self):
-        pass
-
-    def evaluate(self, dataset_path):
-        pass
-
-    def destroy(self):
-        pass
-
-    def dump_parameters(self):
-        pass
-
-    def predict(self, queries):
-        res = []
-        queries = [self.image_to_byte_array(ele) for ele in queries]
-
-        for img_bytes in queries:
-            with tempfile.NamedTemporaryFile() as tmp:
-                with open(tmp.name, 'wb') as f:
-                    f.write(img_bytes)
-                img_path = tmp.name
-                img = Image.open(img_path)
-                width, height = img.size[0], img.size[1]
-                predications = self._recognition(img_path)
-
-                result = dict()
-                result['status'] = "ok"
-                result['predictions'] = []
-
-                for index, box in enumerate(predications):
-                    prob = box[1]
-                    x, y, w, h = box[2][0], box[2][1], box[2][2], box[2][3]
-                    left = x - w / 2
-                    upper = y - h / 2
-                    right = x + w / 2
-                    down = y + h / 2
-                    cropped = img.crop((x - w / 2, y - h / 2, x + w / 2, y + h / 2))  # (left, upper, right, lower)
-                    y = self._classify(cropped)
-
-                    class_id = np.argsort(y[0])[::-1][0]
-                    str_class = self.class_dict[class_id]
-                    jbox = dict()
-                    jbox['label_id'] = str(class_id)
-                    jbox['label'] = str(str_class)
-                    jbox['probability'] = prob
-                    # y_min,x_min,y_max,x_max
-
-                    jbox['detection_box'] = [max(0, upper / height), max(0, left / width),
-                                             min(1, down / height), min(1, right / width)]
-
-                    result['predictions'].append(jbox)
-
-                res.append(result)
-        return res
-
-    def load_parameters(self, params):
-
-        self.class_dict = {v: k for k, v in np.load(self.npy_index)[()].items()}
-
-        zip_file_base64 = params['zip_file_base64']
-
-        self.xception_model = self._build_model(classes=self.classes, image_size=self.image_size)
-
-        with tempfile.NamedTemporaryFile() as tmp:
-            # Convert back to bytes & write to temp file
-            zip_file_base64 = base64.b64decode(zip_file_base64.encode('utf-8'))
-            with open(tmp.name, 'wb') as f:
-                f.write(zip_file_base64)
-            with tempfile.TemporaryDirectory() as d:
-                dataset_zipfile = zipfile.ZipFile(tmp.name, 'r')
-                dataset_zipfile.extractall(path=d)
-                dataset_zipfile.close()
-
-                for file_name in os.listdir(d):
-                    if "yolo" in file_name:
-                        self.det_net = darknet.load_net(b"./singa_auto/custom_models_base/darknet/cfg/food/yolov3-food.cfg",
-                                                        os.path.join(d, file_name).encode(), 0)
-
-                        self.det_meta = darknet.load_meta(b"./singa_auto/custom_models_base/darknet/cfg/food/food.data")
-
-                    if "xception" in file_name:
-                        self.xception_model.load_weights(os.path.join(d, file_name))
-
-    def _build_model(self, classes, image_size):
-        base_model = Xception(include_top=True, input_shape=(image_size, image_size, 3))
-        base_model.layers.pop()
-        predictions = Dense(classes, activation='softmax')(base_model.layers[-1].output)
-        clf_model = Model(input=base_model.input, output=[predictions])
-        return clf_model
-
-    def _recognition(self, img_path):
-        res = darknet.detect(self.det_net, self.det_meta, str.encode(img_path))
-        return res
-
-    def _classify(self, img):
-        width_height_tuple = (self.image_size, self.image_size)
-        if (img.size != width_height_tuple):
-            img = img.resize(width_height_tuple, Image.NEAREST)
-        x = img_to_array(img)
-        x /= 255 * 1.
-        x = x.reshape((1,) + x.shape)
-        y = self.xception_model.predict(x)
-        return y
-
-    @staticmethod
-    def image_to_byte_array(query: List[str]):
-        query = np.asarray(query).astype(np.uint8)
-        image = Image.fromarray(query)
-        imgByteArr = io.BytesIO()
-        image.save(imgByteArr, format='JPEG')
-        imgByteArr = imgByteArr.getvalue()
-        return imgByteArr
diff --git a/singa_auto/darknet/__init__.py b/singa_auto/darknet/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/singa_auto/darknet/darknet.py b/singa_auto/darknet/darknet.py
new file mode 100644
index 00000000..02b34cd2
--- /dev/null
+++ b/singa_auto/darknet/darknet.py
@@ -0,0 +1,159 @@
+from ctypes import *
+import math
+import random
+from PIL import Image
+
+
+def sample(probs):
+    s = sum(probs)
+    probs = [a / s for a in probs]
+    r = random.uniform(0, 1)
+    for i in range(len(probs)):
+        r = r - probs[i]
+        if r <= 0:
+            return i
+    return len(probs) - 1
+
+
+def c_array(ctype, values):
+    arr = (ctype * len(values))()
+    arr[:] = values
+    return arr
+
+
+class BOX(Structure):
+    _fields_ = [("x", c_float),
+                ("y", c_float),
+                ("w", c_float),
+                ("h", c_float)]
+
+
+class DETECTION(Structure):
+    _fields_ = [("bbox", BOX),
+                ("classes", c_int),
+                ("prob", POINTER(c_float)),
+                ("mask", POINTER(c_float)),
+                ("objectness", c_float),
+                ("sort_class", c_int)]
+
+
+class IMAGE(Structure):
+    _fields_ = [("w", c_int),
+                ("h", c_int),
+                ("c", c_int),
+                ("data", POINTER(c_float))]
+
+
+class METADATA(Structure):
+    _fields_ = [("classes", c_int),
+                ("names", POINTER(c_char_p))]
+
+import os
+print(os.getcwd())
+lib = CDLL("./singa_auto/darknet/libdarknet_docker_cpu.so", RTLD_GLOBAL)
+lib.network_width.argtypes = [c_void_p]
+lib.network_width.restype = c_int
+lib.network_height.argtypes = [c_void_p]
+lib.network_height.restype = c_int
+
+predict = lib.network_predict
+predict.argtypes = [c_void_p, POINTER(c_float)]
+predict.restype = POINTER(c_float)
+
+set_gpu = lib.cuda_set_device
+set_gpu.argtypes = [c_int]
+
+make_image = lib.make_image
+make_image.argtypes = [c_int, c_int, c_int]
+make_image.restype = IMAGE
+
+get_network_boxes = lib.get_network_boxes
+get_network_boxes.argtypes = [c_void_p, c_int, c_int, c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)]
+get_network_boxes.restype = POINTER(DETECTION)
+
+make_network_boxes = lib.make_network_boxes
+make_network_boxes.argtypes = [c_void_p]
+make_network_boxes.restype = POINTER(DETECTION)
+
+free_detections = lib.free_detections
+free_detections.argtypes = [POINTER(DETECTION), c_int]
+
+free_ptrs = lib.free_ptrs
+free_ptrs.argtypes = [POINTER(c_void_p), c_int]
+
+network_predict = lib.network_predict
+network_predict.argtypes = [c_void_p, POINTER(c_float)]
+
+reset_rnn = lib.reset_rnn
+reset_rnn.argtypes = [c_void_p]
+
+load_net = lib.load_network
+load_net.argtypes = [c_char_p, c_char_p, c_int]
+load_net.restype = c_void_p
+
+do_nms_obj = lib.do_nms_obj
+do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
+
+do_nms_sort = lib.do_nms_sort
+do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float]
+
+free_image = lib.free_image
+free_image.argtypes = [IMAGE]
+
+letterbox_image = lib.letterbox_image
+letterbox_image.argtypes = [IMAGE, c_int, c_int]
+letterbox_image.restype = IMAGE
+
+load_meta = lib.get_metadata
+lib.get_metadata.argtypes = [c_char_p]
+lib.get_metadata.restype = METADATA
+
+load_image = lib.load_image_color
+load_image.argtypes = [c_char_p, c_int, c_int]
+load_image.restype = IMAGE
+
+rgbgr_image = lib.rgbgr_image
+rgbgr_image.argtypes = [IMAGE]
+
+predict_image = lib.network_predict_image
+predict_image.argtypes = [c_void_p, IMAGE]
+predict_image.restype = POINTER(c_float)
+
+
+def classify(net, meta, im):
+    out = predict_image(net, im)
+    res = []
+    for i in range(meta.classes):
+        res.append((meta.names[i], out[i]))
+    res = sorted(res, key=lambda x: -x[1])
+    return res
+
+
+def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45):
+    im = load_image(image, 0, 0)
+    num = c_int(0)
+    pnum = pointer(num)
+    predict_image(net, im)
+    dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, None, 0, pnum)
+    num = pnum[0]
+    if (nms): do_nms_obj(dets, num, meta.classes, nms);
+
+    res = []
+    for j in range(num):
+        for i in range(meta.classes):
+            if dets[j].prob[i] > 0:
+                b = dets[j].bbox
+                res.append((meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h)))
+    res = sorted(res, key=lambda x: -x[1])
+    free_image(im)
+    free_detections(dets, num)
+    return res
+
+
+if __name__ == "__main__":
+    net = load_net(b"cfg/food/yolov3-food.cfg", b"/Users//Downloads/yolov3-food_final.weights", 0)
+    meta = load_meta(b"cfg/food/food.data")
+    r = detect(net, meta, b"/Users//data/object_detection/000002.jpg")
+    print(r)
+
+
diff --git a/singa_auto/darknet/food_objection_base_model.py b/singa_auto/darknet/food_objection_base_model.py
new file mode 100644
index 00000000..8af86aae
--- /dev/null
+++ b/singa_auto/darknet/food_objection_base_model.py
@@ -0,0 +1,224 @@
+import abc
+import base64
+import io
+import os
+import tempfile
+import zipfile
+from typing import List
+
+import numpy as np
+from PIL import Image
+
+from keras.models import Model
+from keras.layers import Dense
+from keras.preprocessing.image import img_to_array
+
+from singa_auto.darknet import darknet
+from singa_auto.model import BaseModel
+
+
+class FoodDetectionBase(BaseModel):
+
+    def __init__(self, clf_model_class_name, **knobs):
+        super().__init__(**knobs)
+        # model
+        # this is the model class we use
+        self.clf_model_class_name = clf_model_class_name
+
+        # this is the model after we build
+        self.clf_model = None
+
+        # this is the darknet model
+        self.det_net = None
+        self.det_meta = None
+
+        # labels
+        self.class_dict = {}
+
+        # pre config
+        self.classes = None
+        self.image_size = None
+
+        # preload files
+        self.yolo_cfg_name = None
+        self.yolo_weight_name = None
+        self.food_name = None
+
+        # this is the model file downloaded from internet,
+        # can choose download locally and upload , or download from server
+        # if download at server side, leave it to none
+        self.preload_clf_model_weights_name = None
+
+        # this is the trained model
+        self.trained_clf_model_weights_name = None
+
+        self._npy_index_name = None
+
+    def train(self, dataset_path, **kwargs):
+        pass
+
+    def get_knob_config(self):
+        pass
+
+    def evaluate(self, dataset_path):
+        pass
+
+    def destroy(self):
+        pass
+
+    def dump_parameters(self):
+        pass
+
+    def predict(self, queries):
+        print("Get queries")
+
+        res = []
+        queries = [self.image_to_byte_array(ele) for ele in queries]
+
+        for img_bytes in queries:
+            with tempfile.NamedTemporaryFile() as tmp:
+                with open(tmp.name, 'wb') as f:
+                    f.write(img_bytes)
+                img_path = tmp.name
+                img = Image.open(img_path)
+                width, height = img.size[0], img.size[1]
+                predications = self._detection(img_path)
+
+                result = dict()
+                result['status'] = "ok"
+                result['predictions'] = []
+                print("Detection is done, begin to do the classification")
+                for index, box in enumerate(predications):
+                    prob = box[1]
+                    x, y, w, h = box[2][0], box[2][1], box[2][2], box[2][3]
+                    left = x - w / 2
+                    upper = y - h / 2
+                    right = x + w / 2
+                    down = y + h / 2
+                    # (left, upper, right, lower)
+                    cropped = img.crop((x - w / 2, y - h / 2, x + w / 2, y + h / 2))
+                    y = self._classification(cropped)
+
+                    class_id = np.argsort(y[0])[::-1][0]
+                    str_class = self.class_dict[class_id]
+                    jbox = dict()
+                    jbox['label_id'] = str(class_id)
+                    jbox['label'] = str(str_class)
+                    jbox['probability'] = prob
+
+                    jbox['detection_box'] = [max(0, upper / height), max(0, left / width),
+                                             min(1, down / height), min(1, right / width)]
+
+                    result['predictions'].append(jbox)
+
+                res.append(result)
+        return res
+
+    def load_parameters(self, params):
+
+        # get the zip file bytes
+        zip_file_base64 = params['zip_file_base64']
+
+        with tempfile.NamedTemporaryFile() as tmp:
+
+            # Convert back to bytes & write to temp file
+            zip_file_base64_bytes = base64.b64decode(zip_file_base64.encode('utf-8'))
+
+            # write the bytes to local file
+            with open(tmp.name, 'wb') as f:
+                f.write(zip_file_base64_bytes)
+
+            # extract the zip file
+            with tempfile.TemporaryDirectory() as root_path:
+                dataset_zipfile = zipfile.ZipFile(tmp.name, 'r')
+                dataset_zipfile.extractall(path=root_path)
+                dataset_zipfile.close()
+
+                print("Begin to load model dependences")
+
+                # generate yolo dependence pathes
+                yolo_cfg_path = os.path.join(root_path, self.yolo_cfg_name)
+                yolo_weight_path = os.path.join(root_path, self.yolo_weight_name)
+                food_name = os.path.join(root_path, self.food_name)
+
+                # generate a food.data file, which is used by darknet
+                food_data_path = os.path.join(root_path, "food.data")
+
+                with open(food_data_path, 'wb') as f:
+                    f.write(
+                        "classes= 1\ntrain  = '""'\nvalid  = '""'\nnames = {}"
+                            .format(food_name)
+                            .encode()
+                    )
+
+                if self.preload_clf_model_weights_name:
+                    preload_clf_model_weight_path = os.path.join(root_path, self.preload_clf_model_weights_name)
+                else:
+                    preload_clf_model_weight_path = None
+
+                trained_clf_model_weights_path = os.path.join(root_path, self.trained_clf_model_weights_name)
+
+                npy_index_path = os.path.join(root_path, self._npy_index_name)
+
+                # load model files for darknet
+                self.det_net = darknet.load_net(yolo_cfg_path.encode(),
+                                                yolo_weight_path.encode(),
+                                                0)
+
+                self.det_meta = darknet.load_meta(food_data_path.encode())
+
+                print("Begin to build models")
+
+                # load pre-trained model for classification model
+                self.clf_model = self._build_model(
+                    weight_path=preload_clf_model_weight_path,
+                    classes=self.classes,
+                    image_size=self.image_size)
+
+                # load custom trained model for classification model
+                self.clf_model.load_weights(trained_clf_model_weights_path)
+
+                self.class_dict = {v: k for k, v in np.load(npy_index_path)[()].items()}
+
+        print("Loading params...Done!")
+
+    def _build_model(self, weight_path, classes, image_size):
+        if weight_path:
+            base_model = self.clf_model_class_name(
+               weights=weight_path,
+               include_top=True,
+               input_shape=(image_size, image_size, 3)
+               )
+        else:
+            base_model = self.clf_model_class_name(
+               include_top=True,
+               input_shape=(image_size, image_size, 3)
+               )
+        base_model.layers.pop()
+        predictions = Dense(classes, activation='softmax')(base_model.layers[-1].output)
+        clf_model = Model(input=base_model.input, output=[predictions])
+        return clf_model
+
+    def _detection(self, img_path):
+        res = darknet.detect(self.det_net, self.det_meta, str.encode(img_path))
+        return res
+
+    def _classification(self, img):
+        width_height_tuple = (self.image_size, self.image_size)
+        if (img.size != width_height_tuple):
+            img = img.resize(width_height_tuple, Image.NEAREST)
+        x = img_to_array(img)
+        x /= 255 * 1.
+        x = x.reshape((1,) + x.shape)
+        y = self.clf_model.predict(x)
+        return y
+
+    @staticmethod
+    def image_to_byte_array(query: List[str]):
+        query = np.asarray(query).astype(np.uint8)
+        image = Image.fromarray(query)
+        imgByteArr = io.BytesIO()
+        image.save(imgByteArr, format='JPEG')
+        imgByteArr = imgByteArr.getvalue()
+        return imgByteArr
+