Deploy yolov8 on OAK-D #519

NQHuy1905 · 2024-02-16T02:21:21Z

Hi, i am trying to deploy yolov8-pose on OAK-D

I have seen some tutorial before but it is about old version of yolo

How should i do it

Erol444 · 2024-02-16T12:20:13Z

Hi @NQHuy1905 ,
I'd follow this tutorial: https://docs.luxonis.com/en/latest/pages/ai_ml_nn/
You'd likely need to use default NeuralNetwork node, and then do full decoding yourself, as we don't have on-device support for yolo pose architectures.
Thanks ,Erik

NQHuy1905 · 2024-02-19T09:46:58Z

@Erol444 I see, but in the post process , i have non_max_suppression that have build in torchvision in that, so can OAK use torch library also

Erol444 · 2024-02-19T09:59:48Z

Hi @NQHuy1905 ,
For YOLO detection models we run NMS directly on the device. For such models, I would remove the bottom layers (NMS), compile the model, run it on the device, then perform bottom layers (NMS) on the host device itself. This would be the most straightforward path. Thoughts?
Thanks, Erik

NQHuy1905 · 2024-02-19T15:51:11Z

@Erol444 so the latency of preprocess and infer will base on OAK, and the latency of postprocess like NMS will base on host device hardware right?

Erol444 · 2024-02-19T20:49:35Z

Hi @NQHuy1905 ,
Yes, that is correct. I believe NMS (mostly) runs on (device's) CPU anyways, instead of vector cores ("gpus"), so running it on the host wouldn't affect performance negatively.

Julhio · 2024-05-05T00:20:24Z

Hello. I tried to implement the YOLOv8-Pose with the "Spatial Tiny-yolo example" but I couldn't get the keypoints. I have converted the model to "yolov8n-pose_openvino_2022.1_6shave.blob". I tried a lot of times and didn't get the keypoints. I would appreciate it if someone could help me. The code is below:

`#!/usr/bin/env python3

from pathlib import Path
import sys
import cv2
import depthai as dai
import numpy as np
import time

arg = ""

def getPath(path):
return str((Path(file).parent / Path(path)).resolve().absolute())

nnBlobPath = getPath('models/yolov8n-pose_openvino_2022.1_6shave.blob')

if not Path(nnBlobPath).exists():
import sys
raise FileNotFoundError(f'Required file/s not found, please run "{sys.executable} install_requirements.py"')

labelMapPose = [
"head", "neck", "right_shoulder", "right_elbow", "right_wrist",
"left_shoulder", "left_elbow", "left_wrist", "right_hip", "right_knee",
"right_ankle", "left_hip", "left_knee", "left_ankle", "right_eye",
"left_eye", "right_ear", "left_ear"
]

syncNN = True
pipeline = dai.Pipeline()
camRgb = pipeline.create(dai.node.ColorCamera)
spatialDetectionNetwork = pipeline.create(dai.node.YoloSpatialDetectionNetwork)
monoLeft = pipeline.create(dai.node.MonoCamera)
monoRight = pipeline.create(dai.node.MonoCamera)
stereo = pipeline.create(dai.node.StereoDepth)
nnNetworkOut = pipeline.create(dai.node.XLinkOut)

xoutRgb = pipeline.create(dai.node.XLinkOut)
xoutNN = pipeline.create(dai.node.XLinkOut)
xoutDepth = pipeline.create(dai.node.XLinkOut)

xoutRgb.setStreamName("rgb")
xoutNN.setStreamName("detections")
xoutDepth.setStreamName("depth")
nnNetworkOut.setStreamName("nnNetwork")

camRgb.setPreviewSize(640, 640)
print("640, 640")
spatialDetectionNetwork.setAnchorMasks({
"side26": [1,2,3],
"side13": [3,4,5],
"side8400": [6,7,8] # Define anchor masks for side8400
})

camRgb.setResolution(dai.ColorCameraProperties.SensorResolution.THE_1080_P)
camRgb.setInterleaved(False)
camRgb.setColorOrder(dai.ColorCameraProperties.ColorOrder.BGR)

monoLeft.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoLeft.setCamera("left")
monoRight.setResolution(dai.MonoCameraProperties.SensorResolution.THE_400_P)
monoRight.setCamera("right")

stereo.setDefaultProfilePreset(dai.node.StereoDepth.PresetMode.HIGH_DENSITY)
stereo.setDepthAlign(dai.CameraBoardSocket.CAM_A)
stereo.setOutputSize(monoLeft.getResolutionWidth(), monoLeft.getResolutionHeight())
stereo.setSubpixel(True)

spatialDetectionNetwork.setBlobPath(nnBlobPath)
spatialDetectionNetwork.setConfidenceThreshold(0.5)
spatialDetectionNetwork.input.setBlocking(False)
spatialDetectionNetwork.setBoundingBoxScaleFactor(0.5)
spatialDetectionNetwork.setDepthLowerThreshold(100)
spatialDetectionNetwork.setDepthUpperThreshold(5000)

spatialDetectionNetwork.setNumClasses(80)
spatialDetectionNetwork.setCoordinateSize(4)
spatialDetectionNetwork.setAnchors([10,14, 23,27, 37,58, 81,82, 135,169, 344,319])
spatialDetectionNetwork.setIouThreshold(0.5)

monoLeft.out.link(stereo.left)
monoRight.out.link(stereo.right)

camRgb.preview.link(spatialDetectionNetwork.input)
if syncNN:
spatialDetectionNetwork.passthrough.link(xoutRgb.input)
else:
camRgb.preview.link(xoutRgb.input)

spatialDetectionNetwork.out.link(xoutNN.input)

stereo.depth.link(spatialDetectionNetwork.inputDepth)
spatialDetectionNetwork.passthroughDepth.link(xoutDepth.input)
spatialDetectionNetwork.outNetwork.link(nnNetworkOut.input)

with dai.Device(pipeline) as device:
previewQueue = device.getOutputQueue(name="rgb", maxSize=4, blocking=False)
detectionNNQueue = device.getOutputQueue(name="detections", maxSize=4, blocking=False)
depthQueue = device.getOutputQueue(name="depth", maxSize=4, blocking=False)
networkQueue = device.getOutputQueue(name="nnNetwork", maxSize=4, blocking=False)

startTime = time.monotonic()
counter = 0
fps = 0
color = (255, 255, 255)
printOutputLayersOnce = True

while True:
    inPreview = previewQueue.get()
    inDet = detectionNNQueue.get()
    depth = depthQueue.get()
    inNN = networkQueue.get()

    if printOutputLayersOnce:
        toPrint = 'Output layer names:'
        for ten in inNN.getAllLayerNames():
            toPrint = f'{toPrint} {ten},'
        print(toPrint)
        printOutputLayersOnce = False

    frame = inPreview.getCvFrame()
    depthFrame = depth.getFrame() # depthFrame values are in millimeters

    depth_downscaled = depthFrame[::4]
    if np.all(depth_downscaled == 0):
        min_depth = 0  # Set a default minimum depth value when all elements are zero
    else:
        min_depth = np.percentile(depth_downscaled[depth_downscaled != 0], 1)
    max_depth = np.percentile(depth_downscaled, 99)
    depthFrameColor = np.interp(depthFrame, (min_depth, max_depth), (0, 255)).astype(np.uint8)
    depthFrameColor = cv2.applyColorMap(depthFrameColor, cv2.COLORMAP_HOT)

    counter+=1
    current_time = time.monotonic()
    if (current_time - startTime) > 1 :
        fps = counter / (current_time - startTime)
        counter = 0
        startTime = current_time

    detections = inDet.detections

    height = frame.shape[0]
    width  = frame.shape[1]

    print(detections)

    for detection in detections:
        for keypoint in detection.keypoints.xy[0]:  
            x, y = keypoint[0].item(), keypoint[1].item()  
            cv2.circle(frame, (int(x), int(y)), 5, (0, 0, 255), -1)  # Red circle for keypoints

    cv2.putText(frame, "NN fps: {:.2f}".format(fps), (2, frame.shape[0] - 4), cv2.FONT_HERSHEY_TRIPLEX, 0.4, color)
    cv2.imshow("depth", depthFrameColor)
    cv2.imshow("rgb", frame)

    if cv2.waitKey(1) == ord('q'):
        break`

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Deploy yolov8 on OAK-D #519

Deploy yolov8 on OAK-D #519

NQHuy1905 commented Feb 16, 2024

Erol444 commented Feb 16, 2024

NQHuy1905 commented Feb 19, 2024 •

edited

Loading

Erol444 commented Feb 19, 2024

NQHuy1905 commented Feb 19, 2024

Erol444 commented Feb 19, 2024

Julhio commented May 5, 2024 •

edited

Loading

Deploy yolov8 on OAK-D #519

Deploy yolov8 on OAK-D #519

Comments

NQHuy1905 commented Feb 16, 2024

Erol444 commented Feb 16, 2024

NQHuy1905 commented Feb 19, 2024 • edited Loading

Erol444 commented Feb 19, 2024

NQHuy1905 commented Feb 19, 2024

Erol444 commented Feb 19, 2024

Julhio commented May 5, 2024 • edited Loading

NQHuy1905 commented Feb 19, 2024 •

edited

Loading

Julhio commented May 5, 2024 •

edited

Loading