Entangled 3D joints in webcam quick test #13

valillon · 2020-03-06T10:02:27Z

First thing, thank you for your work and releasing the code.
I refurnished it to make it work over the webcam.

On the above example, while openpose seems to perform pretty decent, the 3D hand estimation tends to entangle some joints, specially on the thumb joints looking quite rigid. Have you ever observed that outcome before?

Attaching also the test code, I might be doing something wrong here.
BTW, I'm specially curious why the joins are arranged in triplets with no apparent finger-relation.

Best

from __future__ import division
import torch
from torch.autograd import Variable
from model import resnet34_Mano
# from torch.utils import data
from torchvision.transforms import ToTensor
from PIL import Image
import numpy as np
import time
import cv2
import os
import PyOpenPose as OP


# -------------------------------------------------------------------------------
# Capture

cap = cv2.VideoCapture(0)
ret, frame = cap.read()
imgSize = list(frame.shape)
outSize = imgSize[1::-1]
target_size = 240

# -------------------------------------------------------------------------------
# OpenPose

OPENPOSE_ROOT = os.environ["OPENPOSE_ROOT"]
hand_indexes = [[0,5,6],[7,8,9],[10,11,12],[17,18,19],[20,13,14],[15,16,1],[2,3,4]]
input_size = (target_size, target_size)
output_size = (target_size, target_size)
download_heatmaps = True
with_face = False
with_hands = True
handBB = [0, 0, target_size, target_size]
netres_large = (1312, 736)
netres_medium = (656, 368)
op = OP.OpenPose(netres_large, input_size, output_size,
                 "COCO", OPENPOSE_ROOT + os.sep + "models" + os.sep, 0,
                 download_heatmaps,
                 OP.OpenPose.ScaleMode.ZeroToOne,
                 with_face, with_hands)

# -------------------------------------------------------------------------------

joint_colors = [
    [0,     0,   0],    # wrist
    [0,   255, 255],    # index 1 (base)
    [60,  255, 255],    # index 2
    [120, 255, 255],    # index 3
    [180, 255, 255],    # index 4
    [0,   255,   0],    # heart 1 (base)
    [60,  255,  60],    # heart 2
    [120, 255, 120],    # heart 3
    [180, 255, 180],    # heart 4
    [255,   0, 255],    # little 1 (base)
    [255,  60, 255],    # little 2
    [255, 120, 255],    # little 3
    [255, 180, 255],    # little 4
    [255,   0,   0],    # ring 1 (base)
    [255,  60,  60],    # ring 2
    [255, 120, 120],    # ring 3
    [255, 180, 180],    # ring 4
    [0,     0, 255],    # thumb 1 (base)
    [60,   60, 255],    # thumb 2
    [120, 120, 255],    # thumb 3
    [180, 180, 255],    # thumb 4
    ]


# -------------------------------------------------------------------------------

# 0 use image only as input, 1 use image and joint heat maps as input
input_option = 1

# template = open('data/template.obj')
# content = template.readlines()
# template.close()


# -------------------------------------------------------------------------------
# Model

model = torch.nn.DataParallel(resnet34_Mano(input_option=input_option))    
model.load_state_dict(torch.load('data/model-' + str(input_option) + '.pth'))
model.eval()

# -------------------------------------------------------------------------------
# Main Loop

finger = 0
actual_fps = 0
paused = False
delay = {True: 0, False: 1}

while True:

    start_time = time.time()

    # Capture
    try:
        ret, frame = cap.read()
        frame = cv2.flip(frame, 1)

    except Exception as e:
        print("Failed to grab", e)
        break

    # Formatting
    h, w,_ = frame.shape
    offset = int((w-h) * 0.5)
    bgr = frame[:h,offset:offset+h]
    bgr = cv2.resize(bgr, (target_size, target_size), Image.BILINEAR)

    # 2D join detection
    t = time.time()
    op.detectHands(bgr, np.array(handBB + [0, 0, 0, 0], dtype=np.int32).reshape((1, 8)) )
    t = time.time() - t
    op_fps = 1.0 / t

    res = op.render(bgr)
    cv2.putText(res, 'OpenPose Fps = %0.1f' % op_fps, (20, 20), 0, 0.5, (255, 255, 255))

    heatmap = np.zeros(bgr.shape)
    rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
    inputs = [ToTensor()(rgb)]

    if download_heatmaps:

        left_hands, right_hands = op.getHandHeatmaps()
        hands = right_hands
        handmap = []

        for j, l in enumerate(hand_indexes):

            hm0 = hands[0, l[0], :, ::-1]
            hm1 = hands[0, l[1], :, ::-1]
            hm2 = hands[0, l[2], :, ::-1]

            if j + 1 == finger or finger == 0:
                heatmap[:, :, 0] += hm0 * 255
                heatmap[:, :, 1] += hm1 * 255
                heatmap[:, :, 2] += hm2 * 255

            hm0 = np.expand_dims(np.fliplr(hm0 * 255).astype(np.uint8), axis=2)
            hm1 = np.expand_dims(np.fliplr(hm1 * 255).astype(np.uint8), axis=2)
            hm2 = np.expand_dims(np.fliplr(hm2 * 255).astype(np.uint8), axis=2)

            handmap = np.concatenate((hm0, hm1, hm2), axis=2)
            # handmap = cv2.resize(handmap, output_size, Image.BILINEAR)   # Same as input image
            inputs.append(ToTensor()(handmap))


    inputs = torch.cat(inputs, dim=0)

    # 3D hand detection
    out1, out2 = model( Variable(inputs.cuda()).unsqueeze(0) )

    # print(out1.shape)
    # print(out2.shape)

    # Display 2D joints
    canvas = bgr.copy()
    canvas.fill(255)
    u, v = np.zeros(21), np.zeros(21)

    for i in range(21):

        u[i] = out1[0,2*i]
        v[i] = out1[0,2*i+1]
        cv2.circle(canvas, (int(u[i]), int(v[i])), radius=2, color=joint_colors[i], thickness=2)


    actual_fps = 1.0 / (time.time() - start_time)
    cv2.putText(canvas, 'Total Fps = %0.1f' % actual_fps, (20, 20), 0, 0.5, (0, 0, 0))

    cv2.putText(heatmap, 'Finger = %d' % finger, (20, 20), 0, 0.5, (255, 255, 255))
    composite = np.concatenate((res, heatmap.astype(np.uint8)), axis=1)
    composite = np.concatenate((composite, canvas), axis=1)
    cv2.imshow('3D Hand Pose', composite)

    # Save 3D mesh
    # file1 = open('data/out/'+str(i)+'.obj','w')
    # for j in xrange(778):
    #     file1.write("v %f %f %f\n"%(out2[0,21+j,0],-out2[0,21+j,1],-out2[0,21+j,2]))
    # for j,x in enumerate(content):
    #     a = x[:len(x)-1].split(" ")
    #     if (a[0] == 'f'):
    #         file1.write(x)
    # file1.close()

    key = cv2.waitKey(delay[paused])
    if key & 0xFF == 27:  # esc to exit
        cap.release()
        break

    if key >= ord('0') and key <= ord('7'):
        finger = int(chr(key))


cv2.destroyAllWindows()

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Entangled 3D joints in webcam quick test #13

Entangled 3D joints in webcam quick test #13

valillon commented Mar 6, 2020

Entangled 3D joints in webcam quick test #13

Entangled 3D joints in webcam quick test #13

Comments

valillon commented Mar 6, 2020