diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b5ec64d..5838eb3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,6 +22,11 @@ repos: hooks: - id: basedpyright + - repo: https://github.com/kynan/nbstripout + rev: 0.8.1 + hooks: + - id: nbstripout + - repo: local hooks: - id: just-format diff --git a/examples/.gitattributes b/examples/.gitattributes deleted file mode 100644 index 0b15a56..0000000 --- a/examples/.gitattributes +++ /dev/null @@ -1 +0,0 @@ -*.ipynb filter=jupyter-nbconvert-clear-output diff --git a/examples/nuscenes_mcap.ipynb b/examples/nuscenes_mcap.ipynb index 4b872a3..efb3f60 100644 --- a/examples/nuscenes_mcap.ipynb +++ b/examples/nuscenes_mcap.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "f199a6c2-62e9-47c3-b5c3-18c10fab78d7", + "id": "0", "metadata": {}, "source": [ "# [nuScenes](https://nuscenes.org) x [mcap](https://mcap.dev)\n", @@ -13,7 +13,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8372e8fa-2170-4f5e-8da9-ae91e9382169", + "id": "1", "metadata": {}, "outputs": [], "source": [ @@ -25,7 +25,7 @@ }, { "cell_type": "markdown", - "id": "5722b10d-2dd9-4e9f-8e03-1cafe223f0aa", + "id": "2", "metadata": {}, "source": [ "#### Compose a [hydra](https://hydra.cc) config:" @@ -34,7 +34,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5e297ae3-1a7e-46e4-94ca-3da749209c8c", + "id": "3", "metadata": {}, "outputs": [], "source": [ @@ -52,7 +52,7 @@ }, { "cell_type": "markdown", - "id": "83e29131-0392-4a39-9ab9-e2745d59fefa", + "id": "4", "metadata": {}, "source": [ "#### Instantiate the dataloader:" @@ -61,7 +61,7 @@ { "cell_type": "code", "execution_count": null, - "id": "df270b3c-f637-48ad-9acf-89d8c5cf892e", + "id": "5", "metadata": {}, "outputs": [], "source": [ @@ -70,7 +70,7 @@ }, { "cell_type": "markdown", - "id": "e14b7f76-fb2e-46b4-903f-0f5ed32aee1c", + "id": "6", "metadata": {}, "source": [ "#### Inspect a batch:" @@ -79,7 +79,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9bfe7401-76fb-46d6-83a1-d511eed774c4", + "id": "7", "metadata": {}, "outputs": [], "source": [ @@ -89,7 +89,7 @@ }, { "cell_type": "markdown", - "id": "f5225adf-da67-4626-86c6-beae09974989", + "id": "8", "metadata": {}, "source": [ "#### (optional) Visualize the dataset:" @@ -98,7 +98,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d5eaad36-97fd-4b60-8c67-df5eb9977a52", + "id": "9", "metadata": { "editable": true, "slideshow": { @@ -135,7 +135,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.4" + "version": "3.12.7" } }, "nbformat": 4, diff --git a/examples/nuscenes_rrd.ipynb b/examples/nuscenes_rrd.ipynb index 2f5b7c9..470fefb 100644 --- a/examples/nuscenes_rrd.ipynb +++ b/examples/nuscenes_rrd.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "f199a6c2-62e9-47c3-b5c3-18c10fab78d7", + "id": "0", "metadata": {}, "source": [ "# [nuScenes](https://nuscenes.org) x [rrd](https://github.com/rerun-io/rerun/blob/main/ARCHITECTURE.md#rrd-files)\n", @@ -13,7 +13,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8372e8fa-2170-4f5e-8da9-ae91e9382169", + "id": "1", "metadata": {}, "outputs": [], "source": [ @@ -25,7 +25,7 @@ }, { "cell_type": "markdown", - "id": "5722b10d-2dd9-4e9f-8e03-1cafe223f0aa", + "id": "2", "metadata": {}, "source": [ "#### Compose a [hydra](https://hydra.cc) config:" @@ -34,7 +34,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5e297ae3-1a7e-46e4-94ca-3da749209c8c", + "id": "3", "metadata": {}, "outputs": [], "source": [ @@ -52,7 +52,7 @@ }, { "cell_type": "markdown", - "id": "83e29131-0392-4a39-9ab9-e2745d59fefa", + "id": "4", "metadata": {}, "source": [ "#### Instantiate the dataloader:" @@ -61,7 +61,7 @@ { "cell_type": "code", "execution_count": null, - "id": "df270b3c-f637-48ad-9acf-89d8c5cf892e", + "id": "5", "metadata": {}, "outputs": [], "source": [ @@ -70,7 +70,7 @@ }, { "cell_type": "markdown", - "id": "e14b7f76-fb2e-46b4-903f-0f5ed32aee1c", + "id": "6", "metadata": {}, "source": [ "#### Inspect a batch:" @@ -79,7 +79,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9bfe7401-76fb-46d6-83a1-d511eed774c4", + "id": "7", "metadata": {}, "outputs": [], "source": [ @@ -89,7 +89,7 @@ }, { "cell_type": "markdown", - "id": "7314432a-0e5b-4134-be16-9e0bd2f12581", + "id": "8", "metadata": {}, "source": [ "#### (optional) Visualize the dataset:" @@ -98,7 +98,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d5eaad36-97fd-4b60-8c67-df5eb9977a52", + "id": "9", "metadata": { "editable": true, "slideshow": { @@ -118,7 +118,7 @@ "rr.notebook_show(height=1000, width=2000)\n", "\n", "# log the original recording for comparison\n", - "rr.log_file_from_path(\"../tests/data/nuscenes.rrd\")" + "rr.log_file_from_path(\"../tests/data/nuscenes/rrd/nuscenes_dataset.rrd\")" ] } ], @@ -138,7 +138,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.4" + "version": "3.12.7" } }, "nbformat": 4, diff --git a/justfile b/justfile index f4bd0c8..68c8102 100644 --- a/justfile +++ b/justfile @@ -46,7 +46,7 @@ generate-config: --output yaml \ --strict -test *ARGS: generate-config +test *ARGS: build-protos generate-config uv run --all-extras pytest --capture=no {{ ARGS }} notebook FILE *ARGS: sync generate-config diff --git a/pyproject.toml b/pyproject.toml index afb9fc7..c36caa5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "rbyte" -version = "0.12.0" +version = "0.12.1" description = "Multimodal PyTorch dataset library" authors = [{ name = "Evgenii Gorchakov", email = "evgenii@yaak.ai" }] maintainers = [{ name = "Evgenii Gorchakov", email = "evgenii@yaak.ai" }] @@ -8,17 +8,16 @@ dependencies = [ "tensordict>=0.7.0", "torch", "numpy", - "polars>=1.21.0", - "pydantic>=2.10.2", - "more-itertools>=10.5.0", + "polars>=1.22.0", + "pydantic>=2.10.6", + "more-itertools>=10.6.0", "hydra-core>=1.3.2", - "optree>=0.13.1", - "cachetools>=5.5.0", - "diskcache>=5.6.3", + "optree>=0.14.0", + "cachetools>=5.5.1", "parse>=1.20.2", - "structlog>=24.4.0", - "tqdm>=4.66.5", - "pipefunc>=0.53.0", + "structlog>=25.1.0", + "tqdm>=4.67.1", + "pipefunc>=0.53.3", "xxhash>=3.5.0", ] readme = "README.md" @@ -38,32 +37,29 @@ classifiers = [ repo = "https://github.com/yaak-ai/rbyte" [project.optional-dependencies] -build = ["hatchling>=1.25.0", "grpcio-tools>=1.62.0", "protoletariat==3.2.19"] -visualize = ["rerun-sdk[notebook]==0.21.0"] +build = ["hatchling>=1.27.0"] +protos = ["grpcio-tools>=1.70.0", "protoletariat>=3.3.9"] +visualize = ["rerun-sdk[notebook]>=0.22.0"] mcap = [ - "mcap>=1.2.1", + "mcap>=1.2.2", "mcap-ros2-support>=0.5.5", "protobuf", - "mcap-protobuf-support>=0.5.2", + "mcap-protobuf-support>=0.5.3", ] yaak = ["protobuf", "ptars>=0.0.3"] -jpeg = ["simplejpeg>=1.7.6"] +jpeg = ["simplejpeg>=1.8.1"] video = [ - "python-vali>=4.2.0.post0; sys_platform == 'linux'", - "video-reader-rs>=0.2.2", + "python-vali>=4.2.9.post1; sys_platform == 'linux'", + "video-reader-rs>=0.2.3", ] hdf5 = ["h5py>=3.12.1"] -rrd = ["rerun-sdk==0.21.0", "pyarrow-stubs"] +rrd = ["rerun-sdk>=0.22.0", "pyarrow-stubs"] [project.scripts] rbyte-visualize = 'rbyte.scripts.visualize:main' [build-system] -requires = [ - "hatchling>=1.25.0", - "grpcio-tools>=1.62.0", - "protoletariat==3.2.19", -] +requires = ["hatchling>=1.27.0"] build-backend = "hatchling.build" [dependency-groups] @@ -91,7 +87,7 @@ artifacts = ["src/rbyte/io/yaak/proto/*_pb2.py*"] [tool.hatch.build.targets.sdist.hooks.custom] enable-by-default = true -require-runtime-features = ["build"] +require-runtime-features = ["protos"] [tool.hatch.build.targets.wheel] packages = ["src/rbyte"] diff --git a/src/rbyte/io/_numpy/tensor_source.py b/src/rbyte/io/_numpy/tensor_source.py index 550c3a3..0e0c8ff 100644 --- a/src/rbyte/io/_numpy/tensor_source.py +++ b/src/rbyte/io/_numpy/tensor_source.py @@ -6,9 +6,7 @@ import numpy as np import torch -from numpy.lib.recfunctions import ( - structured_to_unstructured, # pyright: ignore[reportUnknownVariableType] -) +from numpy.lib.recfunctions import structured_to_unstructured from pydantic import validate_call from torch import Tensor @@ -34,8 +32,8 @@ def _path_posix(self) -> str: def _getitem(self, index: object) -> Tensor: path = self._path_posix.format(index) - array = structured_to_unstructured(np.load(path)[self._select]) # pyright: ignore[reportUnknownVariableType] - return torch.from_numpy(np.ascontiguousarray(array)) # pyright: ignore[reportUnknownMemberType, reportUnknownArgumentType] + array = structured_to_unstructured(np.load(path)[self._select]) + return torch.from_numpy(np.ascontiguousarray(array)) # pyright: ignore[reportUnknownMemberType] @override def __getitem__(self, indexes: object | Iterable[object]) -> Tensor: diff --git a/src/rbyte/io/video/ffmpeg_source.py b/src/rbyte/io/video/ffmpeg_source.py index 64c9c7a..5d23063 100644 --- a/src/rbyte/io/video/ffmpeg_source.py +++ b/src/rbyte/io/video/ffmpeg_source.py @@ -20,6 +20,8 @@ def __init__( path: FilePath, threads: NonNegativeInt | None = None, resize_shorter_side: NonNegativeInt | None = None, + device: str | None = None, + filter: str | None = None, ) -> None: super().__init__() @@ -27,6 +29,8 @@ def __init__( filename=Path(path).resolve().as_posix(), threads=threads, resize_shorter_side=resize_shorter_side, + device=device, + filter=filter, ) @override diff --git a/src/rbyte/io/yaak/idl-repo b/src/rbyte/io/yaak/idl-repo index 6c5ab8c..c9e3d52 160000 --- a/src/rbyte/io/yaak/idl-repo +++ b/src/rbyte/io/yaak/idl-repo @@ -1 +1 @@ -Subproject commit 6c5ab8cc54cae6465a43a462ceff5794de16fb0b +Subproject commit c9e3d5284935ef5a070bb3961a7ee5fd1cdd588c diff --git a/src/rbyte/viz/loggers/rerun_logger.py b/src/rbyte/viz/loggers/rerun_logger.py index 19b7686..4d443b5 100644 --- a/src/rbyte/viz/loggers/rerun_logger.py +++ b/src/rbyte/viz/loggers/rerun_logger.py @@ -3,7 +3,6 @@ from math import prod from typing import Annotated, Any, Protocol, Self, cast, override, runtime_checkable -import more_itertools as mit import numpy as np import numpy.typing as npt import rerun as rr @@ -19,9 +18,9 @@ from pydantic.types import AnyType from rerun._baseclasses import ( Archetype, # noqa: PLC2701 - ComponentBatchLike, + ComponentColumn, ) -from rerun._send_columns import TimeColumnLike # noqa: PLC2701 +from rerun._send_columns import TimeColumnLike as _TimeColumnLike # noqa: PLC2701 from structlog import get_logger from structlog.contextvars import bound_contextvars @@ -34,7 +33,7 @@ @runtime_checkable -class TimeColumn(TimeColumnLike, Protocol): ... +class TimeColumnLike(_TimeColumnLike, Protocol): ... class ImageFormat(BaseModel): @@ -65,7 +64,7 @@ def validate_model(self: Self) -> Self: ] -TimeConfig = RerunImportString[type[TimeColumn]] +TimeConfig = RerunImportString[type[TimeColumnLike]] ComponentConfig = ( RerunImportString[type[Archetype]] @@ -78,16 +77,16 @@ def validate_model(self: Self) -> Self: class Schema(RootModel[Mapping[str, TimeConfig | ComponentConfig]]): @cached_property - def times(self) -> Mapping[str, TimeColumn]: - return {k: v for k, v in self.root.items() if isinstance(v, TimeColumn)} + def indexes(self) -> Mapping[str, TimeColumnLike]: + return {k: v for k, v in self.root.items() if isinstance(v, TimeColumnLike)} @cached_property - def components( + def columns( self, ) -> Mapping[ str, type[Archetype] | Mapping[type[rr.Image | rr.DepthImage], ImageFormat] ]: - return {k: v for k, v in self.root.items() if not isinstance(v, TimeColumn)} # pyright: ignore[reportReturnType] + return {k: v for k, v in self.root.items() if not isinstance(v, TimeColumnLike)} # pyright: ignore[reportReturnType] class RerunLogger(Logger[Batch]): @@ -116,23 +115,23 @@ def _get_recording(self, application_id: str) -> rr.RecordingStream: return recording @classmethod - def _build_components( + def _build_columns( cls, array: npt.NDArray[Any], schema: type[Archetype] | Mapping[type[rr.Image | rr.DepthImage], ImageFormat], - ) -> Iterable[ComponentBatchLike]: + ) -> Iterable[ComponentColumn]: match schema: case rr.Scalar: - return [schema.indicator(), rr.components.ScalarBatch(array)] + return rr.Scalar.columns(scalar=array) case rr.Points3D: match shape := array.shape: case (3,): - batch = rr.components.Position3DBatch(array) + return rr.Points3D.columns(positions=array) case (*batch_dims, n, 3): - batch = rr.components.Position3DBatch( - array.reshape(-1, 3) + return rr.Points3D.columns( + positions=array.reshape(-1, 3) ).partition([n] * prod(batch_dims)) case _: @@ -140,10 +139,8 @@ def _build_components( raise NotImplementedError - return [schema.indicator(), batch] - case rr.Tensor: - return [schema.indicator(), rr.components.TensorDataBatch(array)] + return rr.Tensor.columns(data=array) case {rr.Image: image_format} | {rr.DepthImage: image_format}: with bound_contextvars(image_format=image_format, shape=array.shape): @@ -155,15 +152,15 @@ def _build_components( case None, rr.ColorModel(), (*batch_dims, height, width, _): pass - case rr.PixelFormat.NV12, None, (*batch_dims, batch_dim, width): - height = int(batch_dim / 1.5) + case rr.PixelFormat.NV12, None, (*batch_dims, dim, width): + height = int(dim / 1.5) case _: logger.error("not implemented") raise NotImplementedError - image_format = rr.components.ImageFormat( + format = rr.components.ImageFormat( height=height, width=width, pixel_format=image_format.pixel_format, @@ -173,13 +170,10 @@ def _build_components( batch_dim = prod(batch_dims) - return [ - mit.one(schema).indicator(), - rr.components.ImageFormatBatch([image_format] * batch_dim), - rr.components.ImageBufferBatch( - array.reshape(batch_dim, -1).view(np.uint8) - ), - ] + return rr.Image.columns( + buffer=array.reshape(batch_dim, -1).view(np.uint8), + format=[format] * batch_dim, + ) case _: logger.error("not implemented") @@ -190,25 +184,20 @@ def _build_components( def log(self, batch_idx: int, batch: Batch) -> None: for i, sample in enumerate(batch.data): # pyright: ignore[reportArgumentType, reportUnknownVariableType] with self._get_recording(batch.meta.input_id[i]): # pyright: ignore[reportUnknownArgumentType, reportOptionalSubscript, reportUnknownMemberType, reportOptionalMemberAccess] - times: Sequence[TimeColumn] = [ - column( + indexes: Sequence[TimeColumnLike] = [ + index( timeline=timeline, times=np.atleast_1d(sample.get(timeline).numpy()), # pyright: ignore[reportUnknownArgumentType, reportUnknownMemberType, reportCallIssue] ) - for timeline, column in self._schema.times.items() + for timeline, index in self._schema.indexes.items() ] - for entity_path, schema in self._schema.components.items(): + for entity_path, schema in self._schema.columns.items(): with bound_contextvars(path=entity_path, schema=schema): array = cast( npt.NDArray[Any], sample.get(entity_path).cpu().numpy(), # pyright: ignore[reportUnknownMemberType] ) - components = self._build_components(array, schema) - rr.send_columns( - entity_path=entity_path, - times=times, - components=components, - strict=True, - ) + columns = self._build_columns(array, schema) + rr.send_columns(entity_path, indexes, columns, strict=True)