Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Entangled 3D joints in webcam quick test #13

Open
valillon opened this issue Mar 6, 2020 · 0 comments
Open

Entangled 3D joints in webcam quick test #13

valillon opened this issue Mar 6, 2020 · 0 comments

Comments

@valillon
Copy link

valillon commented Mar 6, 2020

Hi @boukhayma ,

First thing, thank you for your work and releasing the code.
I refurnished it to make it work over the webcam.

output

On the above example, while openpose seems to perform pretty decent, the 3D hand estimation tends to entangle some joints, specially on the thumb joints looking quite rigid. Have you ever observed that outcome before?

Attaching also the test code, I might be doing something wrong here.
BTW, I'm specially curious why the joins are arranged in triplets with no apparent finger-relation.

Best

from __future__ import division
import torch
from torch.autograd import Variable
from model import resnet34_Mano
# from torch.utils import data
from torchvision.transforms import ToTensor
from PIL import Image
import numpy as np
import time
import cv2
import os
import PyOpenPose as OP


# -------------------------------------------------------------------------------
# Capture

cap = cv2.VideoCapture(0)
ret, frame = cap.read()
imgSize = list(frame.shape)
outSize = imgSize[1::-1]
target_size = 240

# -------------------------------------------------------------------------------
# OpenPose

OPENPOSE_ROOT = os.environ["OPENPOSE_ROOT"]
hand_indexes = [[0,5,6],[7,8,9],[10,11,12],[17,18,19],[20,13,14],[15,16,1],[2,3,4]]
input_size = (target_size, target_size)
output_size = (target_size, target_size)
download_heatmaps = True
with_face = False
with_hands = True
handBB = [0, 0, target_size, target_size]
netres_large = (1312, 736)
netres_medium = (656, 368)
op = OP.OpenPose(netres_large, input_size, output_size,
                 "COCO", OPENPOSE_ROOT + os.sep + "models" + os.sep, 0,
                 download_heatmaps,
                 OP.OpenPose.ScaleMode.ZeroToOne,
                 with_face, with_hands)

# -------------------------------------------------------------------------------

joint_colors = [
    [0,     0,   0],    # wrist
    [0,   255, 255],    # index 1 (base)
    [60,  255, 255],    # index 2
    [120, 255, 255],    # index 3
    [180, 255, 255],    # index 4
    [0,   255,   0],    # heart 1 (base)
    [60,  255,  60],    # heart 2
    [120, 255, 120],    # heart 3
    [180, 255, 180],    # heart 4
    [255,   0, 255],    # little 1 (base)
    [255,  60, 255],    # little 2
    [255, 120, 255],    # little 3
    [255, 180, 255],    # little 4
    [255,   0,   0],    # ring 1 (base)
    [255,  60,  60],    # ring 2
    [255, 120, 120],    # ring 3
    [255, 180, 180],    # ring 4
    [0,     0, 255],    # thumb 1 (base)
    [60,   60, 255],    # thumb 2
    [120, 120, 255],    # thumb 3
    [180, 180, 255],    # thumb 4
    ]


# -------------------------------------------------------------------------------

# 0 use image only as input, 1 use image and joint heat maps as input
input_option = 1

# template = open('data/template.obj')
# content = template.readlines()
# template.close()


# -------------------------------------------------------------------------------
# Model

model = torch.nn.DataParallel(resnet34_Mano(input_option=input_option))    
model.load_state_dict(torch.load('data/model-' + str(input_option) + '.pth'))
model.eval()

# -------------------------------------------------------------------------------
# Main Loop

finger = 0
actual_fps = 0
paused = False
delay = {True: 0, False: 1}

while True:

    start_time = time.time()

    # Capture
    try:
        ret, frame = cap.read()
        frame = cv2.flip(frame, 1)

    except Exception as e:
        print("Failed to grab", e)
        break

    # Formatting
    h, w,_ = frame.shape
    offset = int((w-h) * 0.5)
    bgr = frame[:h,offset:offset+h]
    bgr = cv2.resize(bgr, (target_size, target_size), Image.BILINEAR)

    # 2D join detection
    t = time.time()
    op.detectHands(bgr, np.array(handBB + [0, 0, 0, 0], dtype=np.int32).reshape((1, 8)) )
    t = time.time() - t
    op_fps = 1.0 / t

    res = op.render(bgr)
    cv2.putText(res, 'OpenPose Fps = %0.1f' % op_fps, (20, 20), 0, 0.5, (255, 255, 255))

    heatmap = np.zeros(bgr.shape)
    rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
    inputs = [ToTensor()(rgb)]

    if download_heatmaps:

        left_hands, right_hands = op.getHandHeatmaps()
        hands = right_hands
        handmap = []

        for j, l in enumerate(hand_indexes):

            hm0 = hands[0, l[0], :, ::-1]
            hm1 = hands[0, l[1], :, ::-1]
            hm2 = hands[0, l[2], :, ::-1]

            if j + 1 == finger or finger == 0:
                heatmap[:, :, 0] += hm0 * 255
                heatmap[:, :, 1] += hm1 * 255
                heatmap[:, :, 2] += hm2 * 255

            hm0 = np.expand_dims(np.fliplr(hm0 * 255).astype(np.uint8), axis=2)
            hm1 = np.expand_dims(np.fliplr(hm1 * 255).astype(np.uint8), axis=2)
            hm2 = np.expand_dims(np.fliplr(hm2 * 255).astype(np.uint8), axis=2)

            handmap = np.concatenate((hm0, hm1, hm2), axis=2)
            # handmap = cv2.resize(handmap, output_size, Image.BILINEAR)   # Same as input image
            inputs.append(ToTensor()(handmap))


    inputs = torch.cat(inputs, dim=0)

    # 3D hand detection
    out1, out2 = model( Variable(inputs.cuda()).unsqueeze(0) )

    # print(out1.shape)
    # print(out2.shape)

    # Display 2D joints
    canvas = bgr.copy()
    canvas.fill(255)
    u, v = np.zeros(21), np.zeros(21)

    for i in range(21):

        u[i] = out1[0,2*i]
        v[i] = out1[0,2*i+1]
        cv2.circle(canvas, (int(u[i]), int(v[i])), radius=2, color=joint_colors[i], thickness=2)


    actual_fps = 1.0 / (time.time() - start_time)
    cv2.putText(canvas, 'Total Fps = %0.1f' % actual_fps, (20, 20), 0, 0.5, (0, 0, 0))

    cv2.putText(heatmap, 'Finger = %d' % finger, (20, 20), 0, 0.5, (255, 255, 255))
    composite = np.concatenate((res, heatmap.astype(np.uint8)), axis=1)
    composite = np.concatenate((composite, canvas), axis=1)
    cv2.imshow('3D Hand Pose', composite)

    # Save 3D mesh
    # file1 = open('data/out/'+str(i)+'.obj','w')
    # for j in xrange(778):
    #     file1.write("v %f %f %f\n"%(out2[0,21+j,0],-out2[0,21+j,1],-out2[0,21+j,2]))
    # for j,x in enumerate(content):
    #     a = x[:len(x)-1].split(" ")
    #     if (a[0] == 'f'):
    #         file1.write(x)
    # file1.close()

    key = cv2.waitKey(delay[paused])
    if key & 0xFF == 27:  # esc to exit
        cap.release()
        break

    if key >= ord('0') and key <= ord('7'):
        finger = int(chr(key))


cv2.destroyAllWindows()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant