-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add VALI-based (GPU) video reader
- Loading branch information
1 parent
aed93a1
commit 8a1d8bb
Showing
12 changed files
with
237 additions
and
69 deletions.
There are no files selected for viewing
2 changes: 1 addition & 1 deletion
2
.../config_templates/frame_reader/video.yaml → ..._templates/frame_reader/video/ffmpeg.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
_target_: rbyte.io.frame.ValiGpuFrameReader | ||
_convert_: all | ||
path: ??? | ||
pixel_format_chain: [NV12] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
[project] | ||
name = "rbyte" | ||
version = "0.3.0" | ||
version = "0.4.0" | ||
description = "Multimodal dataset library" | ||
authors = [{ name = "Evgenii Gorchakov", email = "[email protected]" }] | ||
maintainers = [{ name = "Evgenii Gorchakov", email = "[email protected]" }] | ||
|
@@ -44,7 +44,7 @@ mcap = [ | |
] | ||
yaak = ["protobuf", "ptars>=0.0.2"] | ||
jpeg = ["simplejpeg>=1.7.6"] | ||
video = ["video-reader-rs>=0.1.4"] | ||
video = ["python-vali>=4.2.0.post0", "video-reader-rs>=0.1.5"] | ||
hdf5 = ["h5py>=3.12.1"] | ||
|
||
[project.scripts] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,17 @@ | ||
from .reader import VideoFrameReader | ||
__all__: list[str] = [] | ||
|
||
__all__ = ["VideoFrameReader"] | ||
try: | ||
from .ffmpeg_reader import FfmpegFrameReader | ||
except ImportError: | ||
pass | ||
|
||
else: | ||
__all__ += ["FfmpegFrameReader"] | ||
|
||
try: | ||
from .vali_reader import ValiGpuFrameReader | ||
except ImportError: | ||
pass | ||
|
||
else: | ||
__all__ += ["ValiGpuFrameReader"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
from collections.abc import Iterable, Mapping, Sequence | ||
from functools import cached_property | ||
from itertools import pairwise | ||
from typing import Annotated, override | ||
|
||
import more_itertools as mit | ||
import python_vali as vali | ||
import torch | ||
from jaxtyping import Shaped | ||
from pydantic import ( | ||
BeforeValidator, | ||
ConfigDict, | ||
FilePath, | ||
NonNegativeInt, | ||
validate_call, | ||
) | ||
from structlog import get_logger | ||
from torch import Tensor | ||
|
||
from rbyte.io.frame.base import FrameReader | ||
|
||
logger = get_logger(__name__) | ||
|
||
PixelFormat = Annotated[ | ||
vali.PixelFormat, | ||
BeforeValidator( | ||
lambda x: x if isinstance(x, vali.PixelFormat) else getattr(vali.PixelFormat, x) | ||
), | ||
] | ||
|
||
|
||
class ValiGpuFrameReader(FrameReader): | ||
@validate_call(config=ConfigDict(arbitrary_types_allowed=True)) | ||
def __init__( | ||
self, | ||
path: FilePath, | ||
gpu_id: NonNegativeInt = 0, | ||
pixel_format_chain: tuple[PixelFormat, ...] = ( | ||
vali.PixelFormat.RGB, | ||
vali.PixelFormat.RGB_PLANAR, | ||
), | ||
) -> None: | ||
super().__init__() | ||
|
||
self._gpu_id = gpu_id | ||
|
||
self._decoder = vali.PyDecoder( | ||
input=path.resolve().as_posix(), opts={}, gpu_id=self._gpu_id | ||
) | ||
|
||
self._pixel_format_chain = ( | ||
(self._decoder.Format, *pixel_format_chain) | ||
if mit.first(pixel_format_chain, default=None) != self._decoder.Format | ||
else pixel_format_chain | ||
) | ||
|
||
@cached_property | ||
def _surface_converters( | ||
self, | ||
) -> Mapping[tuple[vali.PixelFormat, vali.PixelFormat], vali.PySurfaceConverter]: | ||
return { | ||
(src_format, dst_format): vali.PySurfaceConverter( | ||
src_format=src_format, dst_format=dst_format, gpu_id=self._gpu_id | ||
) | ||
for src_format, dst_format in pairwise(self._pixel_format_chain) | ||
} | ||
|
||
@cached_property | ||
def _surfaces(self) -> Mapping[vali.PixelFormat, vali.Surface]: | ||
return { | ||
pixel_format: vali.Surface.Make( | ||
format=pixel_format, | ||
width=self._decoder.Width, | ||
height=self._decoder.Height, | ||
gpu_id=self._gpu_id, | ||
) | ||
for pixel_format in self._pixel_format_chain | ||
} | ||
|
||
def _read(self, index: int) -> Shaped[Tensor, "c h w"] | Shaped[Tensor, "h w c"]: | ||
seek_ctx = vali.SeekContext(seek_frame=index) | ||
success, details = self._decoder.DecodeSingleSurface( # pyright: ignore[reportUnknownMemberType] | ||
self._surfaces[self._decoder.Format], seek_ctx | ||
) | ||
if not success: | ||
logger.error(msg := "failed to decode surface", details=details) | ||
|
||
raise RuntimeError(msg) | ||
|
||
for (src_format, dst_format), converter in self._surface_converters.items(): | ||
success, details = converter.Run( # pyright: ignore[reportUnknownMemberType] | ||
(src := self._surfaces[src_format]), (dst := self._surfaces[dst_format]) | ||
) | ||
if not success: | ||
logger.error( | ||
msg := "failed to convert surface", | ||
src=src, | ||
dst=dst, | ||
details=details, | ||
) | ||
|
||
raise RuntimeError(msg) | ||
|
||
surface = self._surfaces[self._pixel_format_chain[-1]] | ||
|
||
return torch.from_dlpack(surface).clone().detach() # pyright: ignore[reportPrivateImportUsage] | ||
|
||
@override | ||
def read( | ||
self, indexes: Iterable[int] | ||
) -> Shaped[Tensor, "b h w c"] | Shaped[Tensor, "b c h w"]: | ||
return torch.stack([self._read(index) for index in indexes]) | ||
|
||
@override | ||
def get_available_indexes(self) -> Sequence[int]: | ||
return range(self._decoder.NumFrames) |
Oops, something went wrong.