PaddlePaddle · psky1111 · Dec 16, 2023 · Feb 12, 2024
diff --git a/deploy/configs/inference_CLIP.yaml b/deploy/configs/inference_CLIP.yaml
@@ -0,0 +1,52 @@
+Global:
+  infer_imgs: "docs/images/inference_deployment/whl_demo.jpg"
+  texts: "text_prompts.txt"
+  inference_image_encoder_dir: "./inference_image"
+  inference_text_encoder_dir: "./inference_text"
+  batch_size: 1
+  mode: "text-to-image"
+  use_gpu: False
+  embedding_size: 512
+  enable_mkldnn: False
+  cpu_num_threads: 10
+  enable_benchmark: True
+  use_fp16: False
+  ir_optim: False # do not set it as True since there is a bug which leads the invaild initilize for predictor
+  use_tensorrt: False
+  gpu_mem: 8000
+  enable_profile: False
+
+PreProcess:
+  transform_ops:
+    - ResizeImage:
+        size: 224
+    - CropImage:
+        size: 224
+    - NormalizeImage:
+        scale: 1.0/255.0
+        mean: [0.48145466, 0.4578275, 0.40821073]
+        std: [0.26862954, 0.26130258, 0.27577711]
+        order: ""
+        channel_num: 3
+    - ToCHWImage:
+
+PostProcess:
+  main_indicator: RamOutPut
+  RamOutPut:
+    language: "en"
+    tag_list: "ppcls/utils/ram/ram_tag_list.txt"
+    tag_list_chinese: "ppcls/utils/ram/ram_tag_list_chinese.txt"
+    ram_class_threshold_path: "ppcls/utils/ram/ram_tag_list_threshold.txt"
+
+
+IndexProcess:
+  index_method: "HNSW32" # supported: HNSW32, IVF, Flat
+  image_index_dir: "./clip_image"
+  text_index_dir: "./clip_text"
+  index_operation: "new" # suported: "append", "remove", "new"
+  delimiter: "\t"
+  dist_type: "IP"
+  embedding_size: 512
+  batch_size: 1
+  return_k: 1
+  score_thres: 0.5
diff --git a/deploy/configs/inference_ram.yaml b/deploy/configs/inference_ram.yaml
@@ -0,0 +1,36 @@
+Global:
+  infer_imgs: "docs/images/inference_deployment/whl_demo.jpg"
+  inference_model_dir: "./inference"
+  batch_size: 1
+  use_gpu: False
+  enable_mkldnn: False
+  cpu_num_threads: 10
+  enable_benchmark: True
+  use_fp16: False
+  ir_optim: False # do not set it as True since there is a bug which leads the invaild initilize for predictor
+  use_tensorrt: False
+  gpu_mem: 8000
+  enable_profile: False
+
+PreProcess:
+  transform_ops:
+    - ResizeImage:
+        resize_short: 384
+    - CropImage:
+        size: 384
+    - NormalizeImage:
+        scale: 1.0/255.0
+        mean: [0.485, 0.456, 0.406]
+        std: [0.229, 0.224, 0.225]
+        order: ""
+        channel_num: 3
+    - ToCHWImage:
+PostProcess:
+  main_indicator: RamOutPut
+  RamOutPut:
+    language: "en"
+    tag_list: "ppcls/utils/ram/ram_tag_list.txt"
+    tag_list_chinese: "ppcls/utils/ram/ram_tag_list_chinese.txt"
+    ram_class_threshold_path: "ppcls/utils/ram/ram_tag_list_threshold.txt"
+
+
diff --git a/deploy/python/postprocess.py b/deploy/python/postprocess.py
@@ -71,7 +71,7 @@ def __init__(self, func_list, main_indicator="Topk"):
     def __call__(self, x, image_file=None):
         rtn = None
         for func in self.func_list:
-            tmp = func(x, image_file)
+            tmp = func(*x, image_file)
             if type(func).__name__ in self.main_indicator:
                 rtn = tmp
         return rtn
@@ -496,3 +496,74 @@ def __call__(self, batch_preds, file_names=None):
                         ).astype(np.int8).tolist()
             batch_res.append({"attributes": label_res, "output": pred_res})
         return batch_res
+
+
+class RamOutPut(object):
+    def __init__(self,
+                 language="cn",
+                 tag_list="",
+                 tag_list_chinese="",
+                 threshold=0.68,
+                 delete_tag_index=[],
+                 ram_class_threshold_path=""):
+        self.language = language
+        assert tag_list, tag_list_chinese
+        self.tag_list = self.load_tag_list(tag_list)
+        self.delete_tag_index = delete_tag_index
+        self.tag_list_chinese = self.load_tag_list(tag_list_chinese)
+        self.num_class = len(self.tag_list)
+        self.class_threshold = paddle.ones([self.num_class]) * threshold
+        with open(ram_class_threshold_path, "r", encoding="utf-8") as f:
+            ram_class_threshold = [float(s.strip()) for s in f]
+        for key, value in enumerate(ram_class_threshold):
+            self.class_threshold[key] = value
+
+    def load_tag_list(self, tag_list_file):
+        with open(tag_list_file, "r", encoding="utf-8") as f:
+            tag_list = f.read().splitlines()
+        tag_list = np.array(tag_list)
+        return tag_list
+
+    def __call__(self, logits, bs, file_names=None):
+        batch_res = []
+        if bs is None:
+            if len(logits.shape) < 2:
+                bs = 1
+            else:
+                bs = logits.shape[0]
+        logits = paddle.to_tensor(logits).reshape([bs,-1])
+        targets = paddle.where(
+            F.sigmoid(logits) > self.class_threshold,
+            paddle.to_tensor([1.0]), paddle.zeros(self.num_class))
+        targets = targets.reshape([bs, -1])
+        res = {}
+        tag = targets.cpu().numpy()
+        tag[:, self.delete_tag_index] = 0
+        tag_output = []
+        tag_output_chinese = []
+        for b in range(bs):
+            index = np.argwhere(tag[b] == 1)
+            token = self.tag_list[index].squeeze(axis=1)
+            tag_output.append(" | ".join(token))
+            token_chinese = self.tag_list_chinese[index].squeeze(axis=1)
+            tag_output_chinese.append(" | ".join(token_chinese))
+        res["cn"] = tag_output_chinese
+        res["en"] = tag_output
+        res["all"] = f"en : {tag_output}, cn: {tag_output_chinese}"
+
+        scores = F.sigmoid(logits).numpy()
+        class_ids_list = []
+        scores_list = []
+
+        for b in range(bs):
+            index = np.argwhere(tag[b] == 1)
+            class_ids_list.append(index.tolist())
+            scores_list.append(scores[b][index].tolist())
+
+        outputformat = {
+            "class_ids": class_ids_list,
+            "scores": scores_list,
+            "label_names": res[self.language]
+        }
+        batch_res.append(outputformat)
+        return outputformat
diff --git a/deploy/python/predict_multimodal.py b/deploy/python/predict_multimodal.py
@@ -0,0 +1,68 @@
+# copyright (c) 2023 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import cv2
+import numpy as np
+
+from paddleclas.deploy.utils import logger, config
+from paddleclas.deploy.utils.get_image_list import get_image_list
+from paddleclas.deploy.python.predict_cls import ClsPredictor
+
+
+def main(config):
+    cls_predictor = ClsPredictor(config)
+    image_list = get_image_list(config["Global"]["infer_imgs"])
+
+    batch_imgs = []
+    batch_names = []
+    cnt = 0
+    for idx, img_path in enumerate(image_list):
+        img = cv2.imread(img_path)
+        if img is None:
+            logger.warning(
+                "Image file failed to read and has been skipped. The path: {}".
+                format(img_path))
+        else:
+            img = img[:, :, ::-1]
+            print(img.shape)
+            batch_imgs.append(img)
+            img_name = os.path.basename(img_path)
+            batch_names.append(img_name)
+            cnt += 1
+
+        if cnt % config["Global"]["batch_size"] == 0 or (idx + 1
+                                                         ) == len(image_list):
+            if len(batch_imgs) == 0:
+                continue
+            batch_results = cls_predictor.predict(batch_imgs)
+            for number, result_dict in enumerate(batch_results):
+                if len(batch_imgs) == 0:
+                    continue
+                for number, result_key in enumerate(batch_results.keys()):
+                    print(
+                        f"{img_name}-{result_key}:{batch_results[result_key]}")
+            batch_imgs = []
+            batch_names = []
+    if cls_predictor.benchmark:
+        cls_predictor.auto_logger.report()
+    return
+
+
+
+if __name__ == "__main__":
+    args = config.parse_args()
+    config = config.get_config(args.config, overrides=args.override, show=True)
+    main(config)