diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d2027d3
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,38 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Distribution / packaging
+.Python
+env/
+build/
+dist/
+*.log
+
+# pyenv
+.python-version
+
+# dotenv
+.env
+
+# virtualenv
+.venv/
+venv/
+ENV/
+
+# VSCode settings
+.vscode
+
+# IDEA files
+.idea
+
+# OSX dir files
+.DS_Store
+
+# Sublime Text settings
+*.sublime-workspace
+*.sublime-project
+
+# Models
+models/
\ No newline at end of file
diff --git a/README.md b/README.md
index 78c269d..ddf1507 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,59 @@
# virtual_try_on_use_deep_learning
-使用深度学习算法实现虚拟试衣,结合了人体姿态估计、人体分割、几何匹配和GAN,四种模型。仅仅只依赖opencv库就能运行
-
-这套程序实现起来挺复杂的,因为它里面包含了4种深度学习模型,而且有一个自定义层CorrelationLayer,需要自己编程实现它。
+>使用深度学习算法实现虚拟试衣,结合了人体姿态估计、人体分割、几何匹配和GAN,四种模型。
+
+### 程序实现思路
+>这套程序实现起来挺复杂的,因为它里面包含了4种深度学习模型,而且有一个自定义层CorrelationLayer,需要自己编程实现它。
然而我想使用onnxruntime作为推理引擎时,onnxruntime不支持自定义层的,这算是opencv的一个优势的。
-本套程序的主程序文件是 main.py,模型文件从百度云盘下载,
-链接:https://pan.baidu.com/s/13Eic0aiMtCGY7iigjg71DQ
+### 模型文件下载与运行
+本套程序仅仅只依赖opencv库就能运行,主程序文件是 main.py,模型文件从百度云盘下载地址如下:
+链接:https://pan.baidu.com/s/13Eic0aiMtCGY7iigjg71DQ
提取码:xsl5
-
-这套程序只有Python版本的,我在本地编写了C++程序,但是输出结果跟Python版本的输出结果始终不一致,
+> 这套程序只有Python版本的,我在本地编写了C++程序,但是输出结果跟Python版本的输出结果始终不一致,
对于这个bug我还没有找到原因,因此我在github只发布python程序的。程序启动运行之后,要等几秒种后才能弹窗显示结果,
程序运行速度慢的问题还有待优化。
+
+ **注:以上为原作者的标注内容**
+
+---
+### 补充
+自测下列环境可以成功运行
+```txt
+python==3.8.0
+certifi==2021.10.8
+numpy==1.22.1
+opencv-python==4.5.1.48
+wincertstore==0.2
+```
+
+### 执行过程
+1. 克隆本项目到本地
+```shell
+git clone https://github.com/GerogeLiu/virtual_try_on_use_deep_learning.git
+```
+2. 项目根目录创建models目录,在百度云盘下载模型文件并将文件放置到models目录
+3. anaconda下创建虚拟环境
+```shell
+# 创建虚拟环境
+conda create -n python==3.8.0
+```
+4. 安装必要的第三方库
+```shell
+# 激活虚拟环境
+conda activate
+
+# 安装opencv
+pip install opencv-python==4.5.1.48 -i https://pypi.douban.com/simple
+```
+5. 执行main.py
+```shell
+# 切换到本项目目录
+cd you/path/to/virtal_try_on_use_deep_learning
+
+# 执行
+python main.py
+```
+
+### 运行效果图
+
\ No newline at end of file
diff --git a/main.py b/main.py
index f20120a..3ed30f2 100644
--- a/main.py
+++ b/main.py
@@ -6,32 +6,42 @@
from common import findFile
from human_parsing import parse_human
-backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV)
-targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD, cv.dnn.DNN_TARGET_HDDL)
-
-parser = argparse.ArgumentParser(description='Use this script to run virtial try-on using CP-VTON', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-parser.add_argument('--input_image', type=str, default='test_img/000074_0.jpg', help='Path to image with person.')
+backends = (
+ cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE,
+ cv.dnn.DNN_BACKEND_OPENCV)
+targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD,
+ cv.dnn.DNN_TARGET_HDDL)
+
+parser = argparse.ArgumentParser(description='Use this script to run virtial try-on using CP-VTON',
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument('--input_image', type=str, default='test_img/000074_0.jpg', help='Path to image with person.')
parser.add_argument('--input_cloth', type=str, default='test_color/000010_1.jpg', help='Path to target cloth image')
-parser.add_argument('--gmm_model', '-gmm', default='cp_vton_gmm.onnx', help='Path to Geometric Matching Module .onnx model.')
-parser.add_argument('--tom_model', '-tom', default='cp_vton_tom.onnx', help='Path to Try-On Module .onnx model.')
-parser.add_argument('--segmentation_model', default='lip_jppnet_384.pb', help='Path to cloth segmentation .pb model.')
-parser.add_argument('--openpose_proto', default='openpose_pose_coco.prototxt', help='Path to OpenPose .prototxt model was trained on COCO dataset.')
-parser.add_argument('--openpose_model', default='openpose_pose_coco.caffemodel', help='Path to OpenPose .caffemodel model was trained on COCO dataset.')
+# TODO 修改模型路径
+parser.add_argument('--gmm_model', '-gmm', default='models/cp_vton_gmm.onnx',
+ help='Path to Geometric Matching Module .onnx model.')
+parser.add_argument('--tom_model', '-tom', default='models/cp_vton_tom.onnx', help='Path to Try-On Module .onnx model.')
+parser.add_argument('--segmentation_model', default='models/lip_jppnet_384.pb', help='Path to cloth segmentation .pb model.')
+parser.add_argument('--openpose_proto', default='models/openpose_pose_coco.prototxt',
+ help='Path to OpenPose .prototxt model was trained on COCO dataset.')
+parser.add_argument('--openpose_model', default='models/openpose_pose_coco.caffemodel',
+ help='Path to OpenPose .caffemodel model was trained on COCO dataset.')
+
parser.add_argument('--backend', choices=backends, default=cv.dnn.DNN_BACKEND_DEFAULT, type=int,
help="Choose one of computation backends: "
- "%d: automatically (by default), "
- "%d: Halide language (http://halide-lang.org/), "
- "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
- "%d: OpenCV implementation" % backends)
+ "%d: automatically (by default), "
+ "%d: Halide language (http://halide-lang.org/), "
+ "%d: Intel's Deep Learning Inference Engine (https://software.intel.com/openvino-toolkit), "
+ "%d: OpenCV implementation" % backends)
parser.add_argument('--target', choices=targets, default=cv.dnn.DNN_TARGET_CPU, type=int,
help='Choose one of target computation devices: '
- '%d: CPU target (by default), '
- '%d: OpenCL, '
- '%d: OpenCL fp16 (half-float precision), '
- '%d: NCS2 VPU, '
- '%d: HDDL VPU' % targets)
+ '%d: CPU target (by default), '
+ '%d: OpenCL, '
+ '%d: OpenCL fp16 (half-float precision), '
+ '%d: NCS2 VPU, '
+ '%d: HDDL VPU' % targets)
args, _ = parser.parse_known_args()
+
def get_pose_map(image, proto_path, model_path, backend, target, height=256, width=192):
radius = 5
inp = cv.dnn.blobFromImage(image, 1.0 / 255, (width, height))
@@ -58,17 +68,19 @@ def get_pose_map(image, proto_path, model_path, backend, target, height=256, wid
pose_map = pose_map.transpose(2, 0, 1)
return pose_map
+
class BilinearFilter(object):
"""
PIL bilinear resize implementation
image = image.resize((image_width // 16, image_height // 16), Image.BILINEAR)
"""
+
def _precompute_coeffs(self, inSize, outSize):
filterscale = max(1.0, inSize / outSize)
ksize = int(np.ceil(filterscale)) * 2 + 1
- kk = np.zeros(shape=(outSize * ksize, ), dtype=np.float32)
- bounds = np.empty(shape=(outSize * 2, ), dtype=np.int32)
+ kk = np.zeros(shape=(outSize * ksize,), dtype=np.float32)
+ bounds = np.empty(shape=(outSize * 2,), dtype=np.int32)
centers = (np.arange(outSize) + 0.5) * filterscale + 0.5
bounds[::2] = np.where(centers - filterscale < 0, 0, centers - filterscale)
@@ -80,7 +92,7 @@ def _precompute_coeffs(self, inSize, outSize):
point = points[xx]
bilinear = np.where(point < 1.0, 1.0 - abs(point), 0.0)
ww = np.sum(bilinear)
- kk[xx * ksize : xx * ksize + bilinear.size] = np.where(ww == 0.0, bilinear, bilinear / ww)
+ kk[xx * ksize: xx * ksize + bilinear.size] = np.where(ww == 0.0, bilinear, bilinear / ww)
return bounds, kk, ksize
def _resample_horizontal(self, out, img, ksize, bounds, kk):
@@ -88,20 +100,20 @@ def _resample_horizontal(self, out, img, ksize, bounds, kk):
for xx in range(0, out.shape[1]):
xmin = bounds[xx * 2 + 0]
xmax = bounds[xx * 2 + 1]
- k = kk[xx * ksize : xx * ksize + xmax]
- out[yy, xx] = np.round(np.sum(img[yy, xmin : xmin + xmax] * k))
+ k = kk[xx * ksize: xx * ksize + xmax]
+ out[yy, xx] = np.round(np.sum(img[yy, xmin: xmin + xmax] * k))
def _resample_vertical(self, out, img, ksize, bounds, kk):
for yy in range(0, out.shape[0]):
ymin = bounds[yy * 2 + 0]
ymax = bounds[yy * 2 + 1]
k = kk[yy * ksize: yy * ksize + ymax]
- out[yy] = np.round(np.sum(img[ymin : ymin + ymax, 0:out.shape[1]] * k[:, np.newaxis], axis=0))
+ out[yy] = np.round(np.sum(img[ymin: ymin + ymax, 0:out.shape[1]] * k[:, np.newaxis], axis=0))
def imaging_resample(self, img, xsize, ysize):
height, width = img.shape[0:2]
bounds_horiz, kk_horiz, ksize_horiz = self._precompute_coeffs(width, xsize)
- bounds_vert, kk_vert, ksize_vert = self._precompute_coeffs(height, ysize)
+ bounds_vert, kk_vert, ksize_vert = self._precompute_coeffs(height, ysize)
out_hor = np.empty((img.shape[0], xsize), dtype=np.uint8)
self._resample_horizontal(out_hor, img, ksize_horiz, bounds_horiz, kk_horiz)
@@ -109,6 +121,7 @@ def imaging_resample(self, img, xsize, ysize):
self._resample_vertical(out, out_hor, ksize_vert, bounds_vert, kk_vert)
return out
+
class CpVton(object):
def __init__(self, gmm_model, tom_model, backend, target):
super(CpVton, self).__init__()
@@ -119,28 +132,29 @@ def __init__(self, gmm_model, tom_model, backend, target):
self.tom_net.setPreferableBackend(backend)
self.tom_net.setPreferableTarget(target)
self.downsample = BilinearFilter()
+
def prepare_agnostic(self, segm_image, input_image, pose_map, height=256, width=192):
palette = {
- 'Background' : (0, 0, 0),
- 'Hat' : (128, 0, 0),
- 'Hair' : (255, 0, 0),
- 'Glove' : (0, 85, 0),
- 'Sunglasses' : (170, 0, 51),
- 'UpperClothes' : (255, 85, 0),
- 'Dress' : (0, 0, 85),
- 'Coat' : (0, 119, 221),
- 'Socks' : (85, 85, 0),
- 'Pants' : (0, 85, 85),
- 'Jumpsuits' : (85, 51, 0),
- 'Scarf' : (52, 86, 128),
- 'Skirt' : (0, 128, 0),
- 'Face' : (0, 0, 255),
- 'Left-arm' : (51, 170, 221),
- 'Right-arm' : (0, 255, 255),
- 'Left-leg' : (85, 255, 170),
- 'Right-leg' : (170, 255, 85),
- 'Left-shoe' : (255, 255, 0),
- 'Right-shoe' : (255, 170, 0)
+ 'Background': (0, 0, 0),
+ 'Hat': (128, 0, 0),
+ 'Hair': (255, 0, 0),
+ 'Glove': (0, 85, 0),
+ 'Sunglasses': (170, 0, 51),
+ 'UpperClothes': (255, 85, 0),
+ 'Dress': (0, 0, 85),
+ 'Coat': (0, 119, 221),
+ 'Socks': (85, 85, 0),
+ 'Pants': (0, 85, 85),
+ 'Jumpsuits': (85, 51, 0),
+ 'Scarf': (52, 86, 128),
+ 'Skirt': (0, 128, 0),
+ 'Face': (0, 0, 255),
+ 'Left-arm': (51, 170, 221),
+ 'Right-arm': (0, 255, 255),
+ 'Left-leg': (85, 255, 170),
+ 'Right-leg': (170, 255, 85),
+ 'Left-shoe': (255, 255, 0),
+ 'Right-shoe': (255, 170, 0)
}
color2label = {val: key for key, val in palette.items()}
head_labels = ['Hat', 'Hair', 'Sunglasses', 'Face', 'Pants', 'Skirt']
@@ -157,7 +171,8 @@ def prepare_agnostic(self, segm_image, input_image, pose_map, height=256, width=
if color2label[pixel] != 'Background':
pose_shape[r, c] = 255
- input_image = cv.dnn.blobFromImage(input_image, 1.0 / 127.5, (width, height), mean=(127.5, 127.5, 127.5), swapRB=True)
+ input_image = cv.dnn.blobFromImage(input_image, 1.0 / 127.5, (width, height), mean=(127.5, 127.5, 127.5),
+ swapRB=True)
input_image = input_image.squeeze(0)
img_head = input_image * phead - (1 - phead)
down = self.downsample.imaging_resample(pose_shape, width // 16, height // 16)
@@ -223,8 +238,8 @@ def _prepare_to_transform(self, out_h=256, out_w=192, grid_size=5):
N = grid_size ** 2
P_Y, P_X = np.meshgrid(axis_coords, axis_coords)
- P_X = np.reshape(P_X,(-1, 1))
- P_Y = np.reshape(P_Y,(-1, 1))
+ P_X = np.reshape(P_X, (-1, 1))
+ P_Y = np.reshape(P_Y, (-1, 1))
P_X = np.expand_dims(np.expand_dims(np.expand_dims(P_X, axis=2), axis=3), axis=4).transpose(4, 1, 2, 3, 0)
P_Y = np.expand_dims(np.expand_dims(np.expand_dims(P_Y, axis=2), axis=3), axis=4).transpose(4, 1, 2, 3, 0)
@@ -263,8 +278,8 @@ def _apply_transformation(self, theta, points, N, P_X, P_Y):
P_X = self._expand_torch(P_X, (1, points_h, points_w, 1, N))
P_Y = self._expand_torch(P_Y, (1, points_h, points_w, 1, N))
- W_X = self._expand_torch(Li[:,:N,:N], (batch_size, N, N)) @ Q_X
- W_Y = self._expand_torch(Li[:,:N,:N], (batch_size, N, N)) @ Q_Y
+ W_X = self._expand_torch(Li[:, :N, :N], (batch_size, N, N)) @ Q_X
+ W_Y = self._expand_torch(Li[:, :N, :N], (batch_size, N, N)) @ Q_Y
W_X = np.expand_dims(np.expand_dims(W_X, axis=3), axis=4).transpose(0, 4, 2, 3, 1)
W_X = np.repeat(W_X, points_h, axis=1)
@@ -302,22 +317,22 @@ def _apply_transformation(self, theta, points, N, P_X, P_Y):
dist_squared[dist_squared == 0] = 1
U = np.multiply(dist_squared, np.log(dist_squared))
- points_X_batch = np.expand_dims(points[:,:,:,0], axis=3)
- points_Y_batch = np.expand_dims(points[:,:,:,1], axis=3)
+ points_X_batch = np.expand_dims(points[:, :, :, 0], axis=3)
+ points_Y_batch = np.expand_dims(points[:, :, :, 1], axis=3)
if points_b == 1:
- points_X_batch = self._expand_torch(points_X_batch, (batch_size, ) + points_X_batch.shape[1:])
- points_Y_batch = self._expand_torch(points_Y_batch, (batch_size, ) + points_Y_batch.shape[1:])
+ points_X_batch = self._expand_torch(points_X_batch, (batch_size,) + points_X_batch.shape[1:])
+ points_Y_batch = self._expand_torch(points_Y_batch, (batch_size,) + points_Y_batch.shape[1:])
- points_X_prime = A_X[:,:,:,:,0]+ \
- np.multiply(A_X[:,:,:,:,1], points_X_batch) + \
- np.multiply(A_X[:,:,:,:,2], points_Y_batch) + \
- np.sum(np.multiply(W_X, self._expand_torch(U, W_X.shape)), 4)
+ points_X_prime = A_X[:, :, :, :, 0] + \
+ np.multiply(A_X[:, :, :, :, 1], points_X_batch) + \
+ np.multiply(A_X[:, :, :, :, 2], points_Y_batch) + \
+ np.sum(np.multiply(W_X, self._expand_torch(U, W_X.shape)), 4)
- points_Y_prime = A_Y[:,:,:,:,0]+ \
- np.multiply(A_Y[:,:,:,:,1], points_X_batch) + \
- np.multiply(A_Y[:,:,:,:,2], points_Y_batch) + \
- np.sum(np.multiply(W_Y, self._expand_torch(U, W_Y.shape)), 4)
+ points_Y_prime = A_Y[:, :, :, :, 0] + \
+ np.multiply(A_Y[:, :, :, :, 1], points_X_batch) + \
+ np.multiply(A_Y[:, :, :, :, 2], points_Y_batch) + \
+ np.sum(np.multiply(W_Y, self._expand_torch(U, W_Y.shape)), 4)
return np.concatenate((points_X_prime, points_Y_prime), 3)
@@ -327,7 +342,7 @@ def _generate_grid(self, theta):
return warped_grid
def _bilinear_sampler(self, img, grid):
- x, y = grid[:,:,:,0], grid[:,:,:,1]
+ x, y = grid[:, :, :, 0], grid[:, :, :, 1]
H = img.shape[2]
W = img.shape[3]
@@ -346,11 +361,11 @@ def _bilinear_sampler(self, img, grid):
# calculate deltas
wa = (x1 - x) * (y1 - y)
- wb = (x1 - x) * (y - y0)
+ wb = (x1 - x) * (y - y0)
wc = (x - x0) * (y1 - y)
- wd = (x - x0) * (y - y0)
+ wd = (x - x0) * (y - y0)
- # clip to range [0, H-1/W-1] to not violate img boundaries
+ # clip to range [0, H-1/W-1] to not violate output_img boundaries
x0 = np.clip(x0, 0, max_x)
x1 = np.clip(x1, 0, max_x)
y0 = np.clip(y0, 0, max_y)
@@ -369,9 +384,10 @@ def _bilinear_sampler(self, img, grid):
wd = np.expand_dims(wd, axis=0)
# compute output
- out = wa*Ia + wb*Ib + wc*Ic + wd*Id
+ out = wa * Ia + wb * Ib + wc * Ic + wd * Id
return out
+
class CorrelationLayer(object):
def __init__(self, params, blobs):
super(CorrelationLayer, self).__init__()
@@ -396,6 +412,7 @@ def forward(self, inputs):
correlation_tensor = np.ascontiguousarray(correlation_tensor)
return [correlation_tensor]
+
if __name__ == "__main__":
if not os.path.isfile(args.gmm_model):
raise OSError("GMM model not exist")
@@ -448,4 +465,4 @@ def forward(self, inputs):
cv.namedWindow(winName, cv.WINDOW_NORMAL)
cv.imshow(winName, output)
cv.waitKey(0)
- cv.destroyAllWindows()
\ No newline at end of file
+ cv.destroyAllWindows()
diff --git a/output_img/output.PNG b/output_img/output.PNG
new file mode 100644
index 0000000..af4e486
Binary files /dev/null and b/output_img/output.PNG differ
diff --git a/requirement.txt b/requirement.txt
new file mode 100644
index 0000000..1a957d5
Binary files /dev/null and b/requirement.txt differ