Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cliprelatedwork #3090

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 52 additions & 0 deletions deploy/configs/inference_CLIP.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
Global:
infer_imgs: "docs/images/inference_deployment/whl_demo.jpg"
texts: "text_prompts.txt"
inference_image_encoder_dir: "./inference_image"
inference_text_encoder_dir: "./inference_text"
batch_size: 1
mode: "text-to-image"
use_gpu: False
embedding_size: 512
enable_mkldnn: False
cpu_num_threads: 10
enable_benchmark: True
use_fp16: False
ir_optim: False # do not set it as True since there is a bug which leads the invaild initilize for predictor
use_tensorrt: False
gpu_mem: 8000
enable_profile: False

PreProcess:
transform_ops:
- ResizeImage:
size: 224
- CropImage:
size: 224
- NormalizeImage:
scale: 1.0/255.0
mean: [0.48145466, 0.4578275, 0.40821073]
std: [0.26862954, 0.26130258, 0.27577711]
order: ""
channel_num: 3
- ToCHWImage:

PostProcess:
main_indicator: RamOutPut
RamOutPut:
language: "en"
tag_list: "ppcls/utils/ram/ram_tag_list.txt"
tag_list_chinese: "ppcls/utils/ram/ram_tag_list_chinese.txt"
ram_class_threshold_path: "ppcls/utils/ram/ram_tag_list_threshold.txt"


IndexProcess:
index_method: "HNSW32" # supported: HNSW32, IVF, Flat
image_index_dir: "./clip_image"
text_index_dir: "./clip_text"
index_operation: "new" # suported: "append", "remove", "new"
delimiter: "\t"
dist_type: "IP"
embedding_size: 512
batch_size: 1
return_k: 1
score_thres: 0.5
36 changes: 36 additions & 0 deletions deploy/configs/inference_ram.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
Global:
infer_imgs: "docs/images/inference_deployment/whl_demo.jpg"
inference_model_dir: "./inference"
batch_size: 1
use_gpu: False
enable_mkldnn: False
cpu_num_threads: 10
enable_benchmark: True
use_fp16: False
ir_optim: False # do not set it as True since there is a bug which leads the invaild initilize for predictor
use_tensorrt: False
gpu_mem: 8000
enable_profile: False

PreProcess:
transform_ops:
- ResizeImage:
resize_short: 384
- CropImage:
size: 384
- NormalizeImage:
scale: 1.0/255.0
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
order: ""
channel_num: 3
- ToCHWImage:
PostProcess:
main_indicator: RamOutPut
RamOutPut:
language: "en"
tag_list: "ppcls/utils/ram/ram_tag_list.txt"
tag_list_chinese: "ppcls/utils/ram/ram_tag_list_chinese.txt"
ram_class_threshold_path: "ppcls/utils/ram/ram_tag_list_threshold.txt"


73 changes: 72 additions & 1 deletion deploy/python/postprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def __init__(self, func_list, main_indicator="Topk"):
def __call__(self, x, image_file=None):
rtn = None
for func in self.func_list:
tmp = func(x, image_file)
tmp = func(*x, image_file)
if type(func).__name__ in self.main_indicator:
rtn = tmp
return rtn
Expand Down Expand Up @@ -496,3 +496,74 @@ def __call__(self, batch_preds, file_names=None):
).astype(np.int8).tolist()
batch_res.append({"attributes": label_res, "output": pred_res})
return batch_res


class RamOutPut(object):
def __init__(self,
language="cn",
tag_list="",
tag_list_chinese="",
threshold=0.68,
delete_tag_index=[],
ram_class_threshold_path=""):
self.language = language
assert tag_list, tag_list_chinese
self.tag_list = self.load_tag_list(tag_list)
self.delete_tag_index = delete_tag_index
self.tag_list_chinese = self.load_tag_list(tag_list_chinese)
self.num_class = len(self.tag_list)
self.class_threshold = paddle.ones([self.num_class]) * threshold
with open(ram_class_threshold_path, "r", encoding="utf-8") as f:
ram_class_threshold = [float(s.strip()) for s in f]
for key, value in enumerate(ram_class_threshold):
self.class_threshold[key] = value

def load_tag_list(self, tag_list_file):
with open(tag_list_file, "r", encoding="utf-8") as f:
tag_list = f.read().splitlines()
tag_list = np.array(tag_list)
return tag_list

def __call__(self, logits, bs, file_names=None):
batch_res = []
if bs is None:
if len(logits.shape) < 2:
bs = 1
else:
bs = logits.shape[0]
logits = paddle.to_tensor(logits).reshape([bs,-1])
targets = paddle.where(
F.sigmoid(logits) > self.class_threshold,
paddle.to_tensor([1.0]), paddle.zeros(self.num_class))
targets = targets.reshape([bs, -1])
res = {}
tag = targets.cpu().numpy()
tag[:, self.delete_tag_index] = 0
tag_output = []
tag_output_chinese = []
for b in range(bs):
index = np.argwhere(tag[b] == 1)
token = self.tag_list[index].squeeze(axis=1)
tag_output.append(" | ".join(token))
token_chinese = self.tag_list_chinese[index].squeeze(axis=1)
tag_output_chinese.append(" | ".join(token_chinese))
res["cn"] = tag_output_chinese
res["en"] = tag_output
res["all"] = f"en : {tag_output}, cn: {tag_output_chinese}"

scores = F.sigmoid(logits).numpy()
class_ids_list = []
scores_list = []

for b in range(bs):
index = np.argwhere(tag[b] == 1)
class_ids_list.append(index.tolist())
scores_list.append(scores[b][index].tolist())

outputformat = {
"class_ids": class_ids_list,
"scores": scores_list,
"label_names": res[self.language]
}
batch_res.append(outputformat)
return outputformat
68 changes: 68 additions & 0 deletions deploy/python/predict_multimodal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# copyright (c) 2023 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

import cv2
import numpy as np

from paddleclas.deploy.utils import logger, config
from paddleclas.deploy.utils.get_image_list import get_image_list
from paddleclas.deploy.python.predict_cls import ClsPredictor


def main(config):
cls_predictor = ClsPredictor(config)
image_list = get_image_list(config["Global"]["infer_imgs"])

batch_imgs = []
batch_names = []
cnt = 0
for idx, img_path in enumerate(image_list):
img = cv2.imread(img_path)
if img is None:
logger.warning(
"Image file failed to read and has been skipped. The path: {}".
format(img_path))
else:
img = img[:, :, ::-1]
print(img.shape)
batch_imgs.append(img)
img_name = os.path.basename(img_path)
batch_names.append(img_name)
cnt += 1

if cnt % config["Global"]["batch_size"] == 0 or (idx + 1
) == len(image_list):
if len(batch_imgs) == 0:
continue
batch_results = cls_predictor.predict(batch_imgs)
for number, result_dict in enumerate(batch_results):
if len(batch_imgs) == 0:
continue
for number, result_key in enumerate(batch_results.keys()):
print(
f"{img_name}-{result_key}:{batch_results[result_key]}")
batch_imgs = []
batch_names = []
if cls_predictor.benchmark:
cls_predictor.auto_logger.report()
return



if __name__ == "__main__":
args = config.parse_args()
config = config.get_config(args.config, overrides=args.override, show=True)
main(config)
Loading