-
Notifications
You must be signed in to change notification settings - Fork 222
/
Copy pathefficientSAM.py
73 lines (54 loc) · 2.6 KB
/
efficientSAM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import numpy as np
import cv2 as cv
class EfficientSAM:
def __init__(self, modelPath, backendId=0, targetId=0):
self._modelPath = modelPath
self._backendId = backendId
self._targetId = targetId
self._model = cv.dnn.readNet(self._modelPath)
self._model.setPreferableBackend(self._backendId)
self._model.setPreferableTarget(self._targetId)
# 3 inputs
self._inputNames = ["batched_images", "batched_point_coords", "batched_point_labels"]
self._outputNames = ['output_masks'] # actual output layer name
self._currentInputSize = None
self._inputSize = [640, 640] # input size for the model
@property
def name(self):
return self.__class__.__name__
def setBackendAndTarget(self, backendId, targetId):
self._backendId = backendId
self._targetId = targetId
self._model.setPreferableBackend(self._backendId)
self._model.setPreferableTarget(self._targetId)
def _preprocess(self, image, points, labels):
image = cv.cvtColor(image, cv.COLOR_BGR2RGB)
# record the input image size, (width, height)
self._currentInputSize = (image.shape[1], image.shape[0])
image = cv.resize(image, self._inputSize)
image = image.astype(np.float32, copy=False) / 255.0
# convert points to (640*640) size space
for p in points:
p[0] = int(p[0] * self._inputSize[0]/self._currentInputSize[0])
p[1] = int(p[1]* self._inputSize[1]/self._currentInputSize[1])
image_blob = cv.dnn.blobFromImage(image)
points_blob = np.array([[points]], dtype=np.float32)
labels_blob = np.array([[[labels]]])
return image_blob, points_blob, labels_blob
def infer(self, image, points, labels):
# Preprocess
imageBlob, pointsBlob, labelsBlob = self._preprocess(image, points, labels)
# Forward
self._model.setInput(imageBlob, self._inputNames[0])
self._model.setInput(pointsBlob, self._inputNames[1])
self._model.setInput(labelsBlob, self._inputNames[2])
outputBlob = self._model.forward()
# Postprocess
results = self._postprocess(outputBlob)
return results
def _postprocess(self, outputBlob):
mask = outputBlob[0, 0, 0, :, :] >= 0
mask_uint8 = (mask * 255).astype(np.uint8)
# change to real image size
mask_uint8 = cv.resize(mask_uint8, dsize=self._currentInputSize, interpolation=2)
return mask_uint8