Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor encoder-related code into EncoderComponent #1130

Open
wants to merge 27 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
0cb3d78
Add get_fps method to StereoComponent
geektrove Oct 12, 2023
9d2b406
Save colormap_manip as instance variable in StereoComponent
geektrove Oct 12, 2023
e736c02
Add EncoderComponent
geektrove Oct 13, 2023
92a4aec
Remove config_encoder methods from CameraComponent
geektrove Oct 13, 2023
59a8a29
Connect node out to encoder
geektrove Oct 13, 2023
b6f5356
Remove duplicated _replay assignment
geektrove Oct 13, 2023
528d5fd
Rename _encoder with node
geektrove Oct 16, 2023
87a62a9
Fix style
geektrove Oct 16, 2023
fd1963a
Ensure encoder compatible size when creating EncoderComponent
geektrove Oct 16, 2023
0a04473
Implement component outputs for EncoderComponent
geektrove Oct 16, 2023
9f0a963
Remove encoded output from CameraComponent
geektrove Oct 16, 2023
2897c8b
Remove encoded output from NNComponent
geektrove Oct 16, 2023
875aaad
Remove encoder from StereoComponent
geektrove Oct 16, 2023
0c40188
Remove unused device and pipeline from instance variables
geektrove Oct 17, 2023
15a2185
Add EncoderComponent to components.__init__
geektrove Oct 17, 2023
3f46e41
Remove encode parameter from create_camera and create_stereo
geektrove Oct 17, 2023
3e85779
Add create_encoder method
geektrove Oct 17, 2023
44b1f96
Set compatible ISP and video size only for ColorCamera
geektrove Oct 17, 2023
53966b6
Add ensure_encoder_compatible_size to StereoComponent
geektrove Oct 17, 2023
e1d8a35
Fix OakCamera method ordering
geektrove Oct 17, 2023
a4b227f
Remove encode from all_cameras, use camera method instead
geektrove Oct 17, 2023
3a5f3bf
Remove unused imports
geektrove Oct 22, 2023
f957de0
Handle replay input (XLinkIn)
geektrove Oct 22, 2023
ea2b377
Format EncoderComponent code
geektrove Oct 22, 2023
c5923b5
Ensure NV12 format when using replay
geektrove Oct 22, 2023
ef08db0
Fix setup.py extra requirements
geektrove Oct 23, 2023
aee9634
Update most of the examples for compatibility with EncoderComponent
geektrove Oct 23, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions depthai_sdk/examples/CameraComponent/camera_encode.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from depthai_sdk import OakCamera

with OakCamera() as oak:
color = oak.create_camera('color', encode='h265')
color = oak.create_camera('color')
encoder = oak.create_encoder(color, codec='h265')

oak.visualize(color.out.encoded, fps=True, scale=2/3)
oak.visualize(encoder, fps=True, scale=2/3)
# By default, it will stream non-encoded frames
oak.visualize(color, fps=True, scale=2/3)
oak.start(blocking=True)
9 changes: 4 additions & 5 deletions depthai_sdk/examples/StereoComponent/stereo_encoded.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@


with OakCamera() as oak:
stereo = oak.create_stereo('800p', fps=30, encode='h264')
stereo = oak.create_stereo('800p', fps=30)
stereo.set_colormap(dai.Colormap.JET) # Must be set before creating the encoder
encoder = oak.create_encoder(stereo, codec='h264')

# Set on-device output colorization, works only for encoded output
stereo.set_colormap(dai.Colormap.JET)

oak.visualize(stereo.out.encoded, fps=True)
oak.visualize(encoder, fps=True)
oak.start(blocking=True)
2 changes: 1 addition & 1 deletion depthai_sdk/examples/mixed/car_tracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Download public depthai-recording
with OakCamera(replay='cars-tracking-above-01') as oak:
# Create color camera, add video encoder
# Create color camera
color = oak.create_camera('color')

# Download & run pretrained vehicle detection model and track detections
Expand Down
6 changes: 3 additions & 3 deletions depthai_sdk/examples/mixed/sync_multiple_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
from depthai_sdk import OakCamera

with OakCamera() as oak:
color = oak.create_camera('color', encode='h264')
color = oak.create_camera('color')
encoder = oak.create_encoder(color, codec='h264')
nn = oak.create_nn('mobilenet-ssd', color)
nn2 = oak.create_nn('face-detection-retail-0004', color)

Expand All @@ -12,8 +13,7 @@ def cb(msgs: Dict):
for name, packet in msgs.items():
print(f"Packet '{name}' with timestamp:", packet.get_timestamp(), 'Seq number:', packet.get_sequence_num(), 'Object', packet)

oak.callback([nn.out.passthrough, nn.out.encoded, nn2.out.encoded], cb) \
oak.callback([nn.out.passthrough, encoder], cb) \
.configure_syncing(enable_sync=True, threshold_ms=30)
# oak.show_graph()

oak.start(blocking=True)
14 changes: 9 additions & 5 deletions depthai_sdk/examples/recording/encode.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,21 @@
from depthai_sdk import OakCamera, RecordType

with OakCamera() as oak:
color = oak.create_camera('color', resolution='1080P', fps=10, encode='H265')
left = oak.create_camera('left', resolution='800p', fps=10, encode='H265')
right = oak.create_camera('right', resolution='800p', fps=10, encode='H265')
color = oak.create_camera('color', resolution='1080P', fps=10)
left = oak.create_camera('left', resolution='800p', fps=10)
right = oak.create_camera('right', resolution='800p', fps=10)

color_encoder = oak.create_encoder(color, codec='h265')
left_encoder = oak.create_encoder(left, codec='h265')
right_encoder = oak.create_encoder(right, codec='h265')

stereo = oak.create_stereo(left=left, right=right)
nn = oak.create_nn('mobilenet-ssd', color, spatial=stereo)

# Sync & save all (encoded) streams
oak.record([color.out.encoded, left.out.encoded, right.out.encoded], './record', RecordType.VIDEO) \
oak.record([color_encoder, left_encoder, right_encoder], './record', RecordType.VIDEO) \
.configure_syncing(enable_sync=True, threshold_ms=50)

oak.visualize([color.out.encoded], fps=True)
oak.visualize([color_encoder], fps=True)

oak.start(blocking=True)
9 changes: 5 additions & 4 deletions depthai_sdk/examples/recording/encoder_preview.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,17 @@

rec = AvWriter(Path('./'), 'color', fourcc=fourcc)

def save_raw_mjpeg(packet):
def save_raw(packet):
rec.write(packet.msg)

with OakCamera() as oak:
color = oak.create_camera('color', encode=fourcc, fps=20)
color = oak.create_camera('color', fps=20)
encoder = oak.create_encoder(color, codec=fourcc)

# Stream encoded video packets to host. For visualization, we decode them
# on the host side, and for callback we write encoded frames directly to disk.
oak.visualize(color.out.encoded, scale=2 / 3, fps=True)
oak.callback(color.out.encoded, callback=save_raw_mjpeg)
oak.visualize(encoder, scale=2 / 3, fps=True)
oak.callback(encoder, callback=save_raw)

oak.start(blocking=True)

Expand Down
13 changes: 8 additions & 5 deletions depthai_sdk/examples/recording/recording_duration.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,18 @@
import time

with OakCamera() as oak:
color = oak.create_camera('color', resolution='1080P', fps=10, encode='H265')
left = oak.create_camera('left', resolution='800p', fps=10, encode='H265')
right = oak.create_camera('right', resolution='800p', fps=10, encode='H265')
color = oak.create_camera('color', resolution='1080P', fps=10)
left = oak.create_camera('left', resolution='800p', fps=10)
right = oak.create_camera('right', resolution='800p', fps=10)
color_encoder = oak.create_encoder(color, codec='h265')
left_encoder = oak.create_encoder(left, codec='h265')
right_encoder = oak.create_encoder(right, codec='h265')

# Sync & save all (encoded) streams
oak.record([color.out.encoded, left.out.encoded, right.out.encoded], './record')
oak.record([color_encoder, left_encoder, right_encoder], './record')
oak.start()
start_time = time.monotonic()
while oak.running():
if time.monotonic() - start_time > 5:
break
oak.poll()
oak.poll()
13 changes: 8 additions & 5 deletions depthai_sdk/examples/recording/rosbag_record.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
from depthai_sdk import OakCamera, RecordType

with OakCamera() as oak:
color = oak.create_camera('color', encode='jpeg', fps=30)
left = oak.create_camera('left', resolution='800p', encode='jpeg', fps=30)
right = oak.create_camera('right', resolution='800p', encode='jpeg', fps=30)
color = oak.create_camera('color', fps=30)
left = oak.create_camera('left', resolution='800p', fps=30)
right = oak.create_camera('right', resolution='800p', fps=30)
color_encoder = oak.create_encoder(color, codec='mjpeg')
left_encoder = oak.create_encoder(left, codec='mjpeg')
right_encoder = oak.create_encoder(right, codec='mjpeg')
stereo = oak.create_stereo(left=left, right=right)
stereo.config_stereo(align=color)
imu = oak.create_imu()
imu.config_imu(report_rate=400, batch_report_threshold=5)

# DB3 / ROSBAG. ROSBAG doesn't require having ROS installed, while DB3 does.
record_components = [left.out.encoded, color.out.encoded, right.out.encoded, stereo.out.depth, imu]
record_components = [left_encoder, color_encoder, right_encoder, stereo.out.depth, imu]
oak.record(record_components, 'record', record_type=RecordType.ROSBAG)

# Visualize only color stream
oak.visualize(color.out.encoded)
oak.visualize(color_encoder)
oak.start(blocking=True)
5 changes: 3 additions & 2 deletions depthai_sdk/examples/trigger_action/person_record.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
from depthai_sdk.trigger_action.triggers.detection_trigger import DetectionTrigger

with OakCamera() as oak:
color = oak.create_camera('color', encode='jpeg')
color = oak.create_camera('color')
color_encoder = oak.create_encoder(color, codec='mjpeg')
stereo = oak.create_stereo('400p')

nn = oak.create_nn('mobilenet-ssd', color)

trigger = DetectionTrigger(input=nn, min_detections={'person': 1}, cooldown=30)
action = RecordAction(inputs=[color, stereo.out.disparity], dir_path='./recordings/',
action = RecordAction(inputs=[color_encoder, stereo.out.disparity], dir_path='./record/',
duration_before_trigger=5, duration_after_trigger=10)
oak.trigger_action(trigger=trigger, action=action)

Expand Down
3 changes: 2 additions & 1 deletion depthai_sdk/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@
'matplotlib==3.5.3; python_version <= "3.7"',
'matplotlib==3.6.1; python_version > "3.7"'],
"replay": ['mcap>=0.0.10',
'mcap-protobuf-support==0.0.4',
'mcap-ros1-support==0.0.8',
'rosbags==0.9.11'],
"record": ['av'],
"record": ['av<9'],
"test": ['pytest']
},
project_urls={
Expand Down
1 change: 1 addition & 0 deletions depthai_sdk/src/depthai_sdk/components/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .component import Component
from .camera_component import CameraComponent
from .encoder_component import EncoderComponent
from .nn_component import NNComponent
from .stereo_component import StereoComponent
from .imu_component import IMUComponent
101 changes: 20 additions & 81 deletions depthai_sdk/src/depthai_sdk/components/camera_component.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import logging
from typing import Dict
from typing import Dict, List, Optional, Union

from depthai_sdk.classes.enum import ResizeMode
from depthai_sdk.components.camera_control import CameraControl
from depthai_sdk.components.camera_helper import *
from depthai_sdk.components.component import Component, ComponentOutput
from depthai_sdk.components.parser import parse_resolution, parse_encode, encoder_profile_to_fourcc
from depthai_sdk.components.parser import parse_resolution
from depthai_sdk.oak_outputs.xout.xout_base import XoutBase, StreamXout, ReplayStream
from depthai_sdk.oak_outputs.xout.xout_frames import XoutFrames
from depthai_sdk.replay import Replay
Expand All @@ -21,7 +21,6 @@ def __init__(self,
str, dai.ColorCameraProperties.SensorResolution, dai.MonoCameraProperties.SensorResolution
]] = None,
fps: Optional[float] = None,
encode: Union[None, str, bool, dai.VideoEncoderProperties.Profile] = None,
sensor_type: Optional[dai.CameraSensorType] = None,
rotation: Optional[int] = None,
replay: Optional[Replay] = None,
Expand All @@ -38,7 +37,6 @@ def __init__(self,
source (str or dai.CameraBoardSocket): Source of the camera. Either color/rgb/right/left
resolution (optional): Camera resolution, eg. '800p' or '4k'
fps (float, optional): Camera FPS
encode: Encode streams before sending them to the host. Either True (use default), or mjpeg/h264/h265
sensor_type: To force color/mono/tof camera
rotation (int, optional): Rotate the camera by 90, 180, 270 degrees
replay (Replay object): Replay object to use for mocking the camera
Expand All @@ -54,7 +52,6 @@ def __init__(self,
self._device = device

self.node: Optional[Union[dai.node.ColorCamera, dai.node.MonoCamera, dai.node.XLinkIn]] = None
self.encoder: Optional[dai.node.VideoEncoder] = None

self.stream: Optional[dai.Node.Output] = None # Node output to be used as eg. an input into NN
self.stream_size: Optional[Tuple[int, int]] = None # Output size
Expand All @@ -64,7 +61,6 @@ def __init__(self,
self._source = self._source[len('CameraBoardSocket.'):]

self._socket = source
self._replay: Optional[Replay] = replay
self._args: Dict = args

self.name = name
Expand Down Expand Up @@ -151,12 +147,11 @@ def __init__(self,
targetWidthIsp = targetWidthRes
res = getClosesResolution(sensor, sensor_type, width=targetWidthRes)
self.node.setResolution(res)
scale = getClosestIspScale(self.node.getIspSize(), width=targetWidthIsp,
videoEncoder=(encode is not None))
scale = getClosestIspScale(self.node.getIspSize(), width=targetWidthIsp)
self.node.setIspScale(*scale)

curr_size = self.node.getVideoSize()
closest = getClosestVideoSize(*curr_size, videoEncoder=encode)
closest = getClosestVideoSize(*curr_size)
self.node.setVideoSize(*closest)
self.node.setVideoNumFramesPool(2) # We will increase it later if we are streaming to host

Expand All @@ -177,26 +172,6 @@ def __init__(self,
self.stream = rot_manip.out
self.stream_size = self.stream_size[::-1]

if encode:
self.encoder = pipeline.createVideoEncoder()
self._encoder_profile = parse_encode(encode) # MJPEG by default
self.encoder.setDefaultProfilePreset(self.get_fps(), self._encoder_profile)

if self.is_replay(): # TODO - this might be not needed, we check for replay above and return
# Create ImageManip to convert to NV12
type_manip = pipeline.createImageManip()
type_manip.setFrameType(dai.ImgFrame.Type.NV12)
type_manip.setMaxOutputFrameSize(self.stream_size[0] * self.stream_size[1] * 3)

self.stream.link(type_manip.inputImage)
type_manip.out.link(self.encoder.input)
elif self.is_mono():
self.stream.link(self.encoder.input)
elif self.is_color():
self.node.video.link(self.encoder.input)
else:
raise ValueError('CameraComponent is neither Color, Mono, nor Replay!')

if self._args:
self._config_camera_args(self._args)

Expand All @@ -210,6 +185,18 @@ def __init__(self,
# CameraControl message doesn't use any additional data (only metadata)
self._control_xlink_in.setMaxDataSize(1)

def ensure_encoder_compatible_size(self) -> None:
if self.is_color():
self.node.setIspScale(
*getClosestIspScale(
self.node.getIspSize(),
width=self.node.getIspWidth(),
videoEncoder=True),
)
self.node.setVideoSize(
*getClosestVideoSize(*self.node.getVideoSize(), videoEncoder=True)
)

def on_pipeline_started(self, device: dai.Device):
if self._control_xlink_in is not None:
queue = device.getInputQueue(self._control_xlink_in.getStreamName())
Expand Down Expand Up @@ -402,46 +389,8 @@ def set_fps(self, fps: float):
else:
self.node.setFps(fps)

def config_encoder_h26x(self,
rate_control_mode: Optional[dai.VideoEncoderProperties.RateControlMode] = None,
keyframe_freq: Optional[int] = None,
bitrate_kbps: Optional[int] = None,
num_b_frames: Optional[int] = None,
):
if self.encoder is None:
raise Exception('Video encoder was not enabled!')
if self._encoder_profile == dai.VideoEncoderProperties.Profile.MJPEG:
raise Exception('Video encoder was set to MJPEG while trying to configure H26X attributes!')

if rate_control_mode is not None:
self.encoder.setRateControlMode(rate_control_mode)
if keyframe_freq is not None:
self.encoder.setKeyframeFrequency(keyframe_freq)
if bitrate_kbps is not None:
self.encoder.setBitrateKbps(bitrate_kbps)
if num_b_frames is not None:
self.encoder.setNumBFrames(num_b_frames)

def config_encoder_mjpeg(self,
quality: Optional[int] = None,
lossless: bool = False
):
if self.encoder is None:
raise Exception('Video encoder was not enabled!')
if self._encoder_profile != dai.VideoEncoderProperties.Profile.MJPEG:
raise Exception(
f'Video encoder was set to {self._encoder_profile} while trying to configure MJPEG attributes!'
)

if quality is not None:
self.encoder.setQuality(quality)
if lossless is not None:
self.encoder.setLossless(lossless)

def get_stream_xout(self, fourcc: Optional[str] = None) -> StreamXout:
if self.encoder is not None and fourcc is not None:
return StreamXout(self.encoder.bitstream, name=self.name or self._source + '_bitstream')
elif self.is_replay():
def get_stream_xout(self) -> StreamXout:
if self.is_replay():
return ReplayStream(self.name or self._source)
elif self.is_mono():
return StreamXout(self.stream, name=self.name or self._source + '_mono')
Expand All @@ -464,32 +413,22 @@ def set_num_frames_pool(self, num_frames: int, preview_num_frames: Optional[int]
if preview_num_frames is not None:
self._preview_num_frames_pool = preview_num_frames

def get_fourcc(self) -> Optional[str]:
if self.encoder is None:
return None
return encoder_profile_to_fourcc(self._encoder_profile)

"""
Available outputs (to the host) of this component
"""

class Out:
class CameraOut(ComponentOutput):
def __call__(self, device: dai.Device, fourcc: Optional[str] = None) -> XoutBase:
return XoutFrames(self._comp.get_stream_xout(fourcc), fourcc).set_comp_out(self)
def __call__(self, device: dai.Device) -> XoutBase:
return XoutFrames(self._comp.get_stream_xout()).set_comp_out(self)

class ReplayOut(ComponentOutput):
def __call__(self, device: dai.Device) -> XoutBase:
return XoutFrames(ReplayStream(self._comp._source)).set_comp_out(self)

class EncodedOut(CameraOut):
def __call__(self, device: dai.Device) -> XoutBase:
return super().__call__(device, fourcc=self._comp.get_fourcc())


def __init__(self, camera_component: 'CameraComponent'):
self.replay = self.ReplayOut(camera_component)
self.camera = self.CameraOut(camera_component)
self.encoded = self.EncodedOut(camera_component)

self.main = self.replay if camera_component.is_replay() else self.camera
Loading