diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..b23f43f --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +tests/data/** filter=lfs diff=lfs merge=lfs -text diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a1d0a69..5753901 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -10,9 +10,8 @@ on: workflow_dispatch: jobs: - pre-commit: + ci: runs-on: ubuntu-latest - steps: - name: setup ssh uses: webfactory/ssh-agent@v0.9.0 @@ -24,15 +23,19 @@ jobs: with: submodules: recursive persist-credentials: false + lfs: true + + - name: git lfs checkout + run: git lfs checkout + + - name: setup just + uses: extractions/setup-just@v2 - name: setup python uses: actions/setup-python@v5 with: python-version-file: "pyproject.toml" - - name: setup just - uses: extractions/setup-just@v2 - - name: setup ytt uses: carvel-dev/setup-action@v2 with: @@ -48,14 +51,17 @@ jobs: - name: sync run: just sync - - name: install tools - run: just install-tools + - name: build protos + run: just build-protos - - name: restore pre-commit cache - uses: actions/cache@v4 - with: - path: ~/.cache/pre-commit - key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }} + - name: format + run: just format --check + + - name: lint + run: just lint + + - name: typecheck + run: just typecheck - - name: pre-commit - run: just pre-commit + - name: test + run: just test diff --git a/.gitignore b/.gitignore index de04814..301ca75 100644 --- a/.gitignore +++ b/.gitignore @@ -10,7 +10,9 @@ wheels/ .venv # generated -examples/config +config/* +!config/_templates + **/*_pb2.py **/*_pb2.pyi **/outputs/* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 6a06afa..1be9766 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,4 @@ --- -fail_fast: true - repos: - repo: https://github.com/abravalheri/validate-pyproject rev: v0.21 @@ -17,21 +15,15 @@ repos: hooks: - id: pyupgrade - - repo: https://github.com/google/yamlfmt - rev: v0.13.0 - hooks: - - id: yamlfmt - exclude: examples/config - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.9 + rev: v0.7.0 hooks: - id: ruff args: [--fix] - id: ruff-format - repo: https://github.com/DetachHead/basedpyright-pre-commit-mirror - rev: 1.18.4 + rev: 1.19.0 hooks: - id: basedpyright @@ -44,11 +36,3 @@ repos: entry: just --fmt --unstable pass_filenames: false always_run: true - - - id: generate-example-config - name: generate-example-config - language: system - stages: [pre-commit] - entry: just generate-example-config - pass_filenames: false - always_run: true diff --git a/.yamlfmt.yaml b/.yamlfmt.yaml deleted file mode 100644 index 159329b..0000000 --- a/.yamlfmt.yaml +++ /dev/null @@ -1,8 +0,0 @@ ---- -formatter: - type: basic - indent: 2 - include_document_start: true - retain_line_breaks_single: true - trim_trailing_whitespace: true - eof_newline: true diff --git a/README.md b/README.md index 5c40ff5..f6b51ad 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ uv add https://github.com/yaak-ai/rbyte/releases/latest/download/rbyte-X.Y.Z-py3 ## Examples -See [examples/config_templates](examples/config_templates) ([`ytt`](https://carvel.dev/ytt/) templates) and [justfile](justfile) for usage examples. +See [config/_templates](./config/_templates) ([`ytt`](https://carvel.dev/ytt/) templates) and [justfile](./justfile) for usage examples.
nuScenes x mcap diff --git a/examples/config_templates/build_table.yaml b/config/_templates/build_table.yaml similarity index 100% rename from examples/config_templates/build_table.yaml rename to config/_templates/build_table.yaml diff --git a/config/_templates/dataloader/torch.yaml b/config/_templates/dataloader/torch.yaml new file mode 100644 index 0000000..9839b8b --- /dev/null +++ b/config/_templates/dataloader/torch.yaml @@ -0,0 +1,13 @@ +--- +_target_: torch.utils.data.DataLoader +dataset: ${dataset} +shuffle: false +batch_size: 1 +collate_fn: + _target_: rbyte.utils.dataloader.collate_identity + _partial_: true + +num_workers: 1 +pin_memory: false +persistent_workers: true +multiprocessing_context: forkserver diff --git a/examples/config_templates/dataset/carla.yaml b/config/_templates/dataset/carla.yaml similarity index 99% rename from examples/config_templates/dataset/carla.yaml rename to config/_templates/dataset/carla.yaml index 4f308d5..e565b67 100644 --- a/examples/config_templates/dataset/carla.yaml +++ b/config/_templates/dataset/carla.yaml @@ -105,7 +105,7 @@ inputs: filter: | `control.throttle` > 0.5 - #@ end + #@ end sample_builder: _target_: rbyte.sample.builder.GreedySampleTableBuilder diff --git a/examples/config_templates/dataset/hdf5.yaml b/config/_templates/dataset/mimicgen.yaml similarity index 68% rename from examples/config_templates/dataset/hdf5.yaml rename to config/_templates/dataset/mimicgen.yaml index f15e1c1..680ad25 100644 --- a/examples/config_templates/dataset/hdf5.yaml +++ b/config/_templates/dataset/mimicgen.yaml @@ -1,21 +1,16 @@ -#! https://sites.google.com/view/il-for-mm/datasets#h.cq0r3rd5nr9m +#! https://huggingface.co/datasets/amandlek/mimicgen_datasets/blob/main/source/coffee.hdf5 -#@yaml/map-key-override #@yaml/text-templated-strings #@ inputs = { -#@ "table_setup_from_dishwasher_sample": [ +#@ "coffee": [ #@ "/data/demo_0", #@ "/data/demo_1", -#@ "/data/demo_10", -#@ "/data/demo_101", -#@ "/data/demo_102", #@ ] #@ } #@ frame_keys = [ -#@ '/obs/rgb', -#@ '/obs/depth', +#@ 'obs/agentview_image', #@ ] --- _target_: rbyte.Dataset @@ -30,9 +25,9 @@ inputs: (@=frame_key@): index_column: _idx_ reader: - _target_: rbyte.io.frame.hdf5.Hdf5FrameReader + _target_: rbyte.io.frame.Hdf5FrameReader path: "${data_dir}/(@=input_id@).hdf5" - key: (@=input_key@)(@=frame_key@) + key: (@=input_key@)/(@=frame_key@) #@ end table: @@ -42,13 +37,12 @@ inputs: readers: - path: "${data_dir}/(@=input_id@).hdf5" reader: - _target_: rbyte.io.table.hdf5.Hdf5TableReader + _target_: rbyte.io.table.Hdf5TableReader _recursive_: false fields: (@=input_key@): _idx_: - obs/object: - task_successes: + obs/robot0_eef_pos: merger: _target_: rbyte.io.table.TableConcater diff --git a/examples/config_templates/dataset/mcap.yaml b/config/_templates/dataset/nuscenes.yaml similarity index 91% rename from examples/config_templates/dataset/mcap.yaml rename to config/_templates/dataset/nuscenes.yaml index 4e94a0b..120f7a1 100644 --- a/examples/config_templates/dataset/mcap.yaml +++ b/config/_templates/dataset/nuscenes.yaml @@ -3,7 +3,7 @@ #@yaml/text-templated-strings #@ inputs = [ -#@ 'NuScenes-v1.0-mini-scene-0103', +#@ 'nuScenes-v1.0-mini-scene-0061-cut', #@ ] #@ camera_topics = [ @@ -23,7 +23,7 @@ inputs: (@=topic@): index_column: (@=topic@)/_idx_ reader: - _target_: rbyte.io.frame.mcap.McapFrameReader + _target_: rbyte.io.frame.McapFrameReader path: "${data_dir}/(@=input_id@).mcap" topic: (@=topic@) decoder_factory: mcap_protobuf.decoder.DecoderFactory @@ -42,7 +42,7 @@ inputs: readers: - path: "${data_dir}/(@=input_id@).mcap" reader: - _target_: rbyte.io.table.mcap.McapTableReader + _target_: rbyte.io.table.McapTableReader _recursive_: false decoder_factories: - rbyte.utils.mcap.ProtobufDecoderFactory @@ -78,7 +78,7 @@ inputs: method: ref _idx_: method: asof - tolerance: 10ms + tolerance: 40ms strategy: nearest #@ end @@ -89,7 +89,7 @@ inputs: method: interp filter: | - `/odom/vel.x` >= 8.6 + `/odom/vel.x` >= 8 cache: _target_: rbyte.utils.dataframe.DataframeDiskCache diff --git a/examples/config_templates/dataset/yaak.yaml b/config/_templates/dataset/yaak.yaml similarity index 85% rename from examples/config_templates/dataset/yaak.yaml rename to config/_templates/dataset/yaak.yaml index e8f3427..02b6671 100644 --- a/examples/config_templates/dataset/yaak.yaml +++ b/config/_templates/dataset/yaak.yaml @@ -1,7 +1,7 @@ #@yaml/text-templated-strings #@ drives = [ -#@ 'Niro098-HQ/2024-01-22--09-03-16', +#@ 'Niro098-HQ/2024-06-18--13-39-54', #@ ] #@ cameras = [ @@ -21,16 +21,11 @@ inputs: (@=source_id@): index_column: "ImageMetadata.(@=source_id@).frame_idx" reader: - _target_: rbyte.io.frame.DirectoryFrameReader + _target_: rbyte.io.frame.FfmpegFrameReader _recursive_: true - path: "${data_dir}/(@=input_id@)/frames/(@=source_id@).pii.mp4/576x324/{:09d}.jpg" - frame_decoder: - _target_: simplejpeg.decode_jpeg - _partial_: true - colorspace: rgb - fastdct: true - fastupsample: true - #@ end + path: "${data_dir}/(@=input_id@)/(@=source_id@).pii.mp4" + resize_shorter_side: 324 + #@ end table: builder: @@ -72,7 +67,7 @@ inputs: - path: ${data_dir}/(@=input_id@)/ai.mcap reader: - _target_: rbyte.io.table.mcap.McapTableReader + _target_: rbyte.io.table.McapTableReader _recursive_: false decoder_factories: [rbyte.utils.mcap.ProtobufDecoderFactory] fields: @@ -110,6 +105,7 @@ inputs: gear: method: asof tolerance: 100ms + strategy: nearest /ai/safety_score: clip.end_timestamp: @@ -117,7 +113,7 @@ inputs: score: method: asof - tolerance: 100ms + tolerance: 500ms strategy: nearest filter: | @@ -132,8 +128,8 @@ inputs: sample_builder: _target_: rbyte.sample.builder.GreedySampleTableBuilder index_column: ImageMetadata.(@=cameras[0]@).frame_idx - length: 6 - stride: 10 - min_step: 6 + length: 1 + stride: 1 + min_step: 1 filter: | - array_lower(`VehicleMotion.speed`) > 50 + array_mean(`VehicleMotion.speed`) > 40 diff --git a/examples/config_templates/frame_reader/directory.yaml b/config/_templates/frame_reader/directory.yaml similarity index 100% rename from examples/config_templates/frame_reader/directory.yaml rename to config/_templates/frame_reader/directory.yaml diff --git a/examples/config_templates/frame_reader/hdf5.yaml b/config/_templates/frame_reader/hdf5.yaml similarity index 100% rename from examples/config_templates/frame_reader/hdf5.yaml rename to config/_templates/frame_reader/hdf5.yaml diff --git a/examples/config_templates/frame_reader/mcap.yaml b/config/_templates/frame_reader/mcap.yaml similarity index 84% rename from examples/config_templates/frame_reader/mcap.yaml rename to config/_templates/frame_reader/mcap.yaml index 1dc73e5..f19e906 100644 --- a/examples/config_templates/frame_reader/mcap.yaml +++ b/config/_templates/frame_reader/mcap.yaml @@ -1,5 +1,5 @@ --- -_target_: rbyte.io.frame.mcap.McapFrameReader +_target_: rbyte.io.frame.McapFrameReader _recursive_: true path: ??? topic: ??? diff --git a/examples/config_templates/frame_reader/video/ffmpeg.yaml b/config/_templates/frame_reader/video/ffmpeg.yaml similarity index 100% rename from examples/config_templates/frame_reader/video/ffmpeg.yaml rename to config/_templates/frame_reader/video/ffmpeg.yaml diff --git a/config/_templates/frame_reader/video/vali.yaml b/config/_templates/frame_reader/video/vali.yaml new file mode 100644 index 0000000..b65be70 --- /dev/null +++ b/config/_templates/frame_reader/video/vali.yaml @@ -0,0 +1,5 @@ +--- +_target_: rbyte.io.frame.video.vali_reader.ValiGpuFrameReader +_convert_: all +path: ??? +pixel_format_chain: [NV12] diff --git a/examples/config_templates/logger/console.yaml b/config/_templates/logger/console.yaml similarity index 100% rename from examples/config_templates/logger/console.yaml rename to config/_templates/logger/console.yaml diff --git a/examples/config_templates/logger/rerun/carla.yaml b/config/_templates/logger/rerun/carla.yaml similarity index 99% rename from examples/config_templates/logger/rerun/carla.yaml rename to config/_templates/logger/rerun/carla.yaml index 1204650..51fb3dd 100644 --- a/examples/config_templates/logger/rerun/carla.yaml +++ b/config/_templates/logger/rerun/carla.yaml @@ -3,7 +3,6 @@ #@ cameras = [ #@ 'cam_front_left', #@ ] - --- _target_: rbyte.viz.loggers.RerunLogger _recursive_: true diff --git a/config/_templates/logger/rerun/mimicgen.yaml b/config/_templates/logger/rerun/mimicgen.yaml new file mode 100644 index 0000000..e0093ce --- /dev/null +++ b/config/_templates/logger/rerun/mimicgen.yaml @@ -0,0 +1,11 @@ +--- +_target_: rbyte.viz.loggers.RerunLogger +schema: + frame: + obs/agentview_image: + Image: + color_model: RGB + + table: + _idx_: TimeSequenceColumn + obs/robot0_eef_pos: Points3D diff --git a/examples/config_templates/logger/rerun/mcap.yaml b/config/_templates/logger/rerun/nuscenes.yaml similarity index 100% rename from examples/config_templates/logger/rerun/mcap.yaml rename to config/_templates/logger/rerun/nuscenes.yaml diff --git a/examples/config_templates/logger/rerun/yaak.yaml b/config/_templates/logger/rerun/yaak.yaml similarity index 99% rename from examples/config_templates/logger/rerun/yaak.yaml rename to config/_templates/logger/rerun/yaak.yaml index 28eb05b..ba81bb9 100644 --- a/examples/config_templates/logger/rerun/yaak.yaml +++ b/config/_templates/logger/rerun/yaak.yaml @@ -5,7 +5,6 @@ #@ 'cam_left_backward', #@ 'cam_right_backward', #@ ] - --- _target_: rbyte.viz.loggers.RerunLogger schema: diff --git a/examples/config_templates/read_frames.yaml b/config/_templates/read_frames.yaml similarity index 100% rename from examples/config_templates/read_frames.yaml rename to config/_templates/read_frames.yaml diff --git a/examples/config_templates/table_builder/carla.yaml b/config/_templates/table_builder/carla.yaml similarity index 98% rename from examples/config_templates/table_builder/carla.yaml rename to config/_templates/table_builder/carla.yaml index 9184245..9e29ea4 100644 --- a/examples/config_templates/table_builder/carla.yaml +++ b/config/_templates/table_builder/carla.yaml @@ -24,5 +24,5 @@ merger: _target_: rbyte.io.table.TableConcater method: vertical -filter: | +filter: |- `control.throttle` > 0.5 diff --git a/examples/config_templates/table_builder/hdf5.yaml b/config/_templates/table_builder/hdf5.yaml similarity index 61% rename from examples/config_templates/table_builder/hdf5.yaml rename to config/_templates/table_builder/hdf5.yaml index 4a9562e..5bc81f5 100644 --- a/examples/config_templates/table_builder/hdf5.yaml +++ b/config/_templates/table_builder/hdf5.yaml @@ -4,21 +4,16 @@ _convert_: all readers: - path: ??? reader: - _target_: rbyte.io.table.hdf5.Hdf5TableReader + _target_: rbyte.io.table.Hdf5TableReader _recursive_: false fields: /data/demo_0: _idx_: actions: dones: - obs/gt_nav: - obs/object: - obs/proprio: - obs/proprio_nav: - obs/scan: + obs/robot0_eef_pos: rewards: states: - task_successes: merger: _target_: rbyte.io.table.TableConcater diff --git a/examples/config_templates/table_builder/mcap.yaml b/config/_templates/table_builder/mcap.yaml similarity index 96% rename from examples/config_templates/table_builder/mcap.yaml rename to config/_templates/table_builder/mcap.yaml index bb2343c..2564115 100644 --- a/examples/config_templates/table_builder/mcap.yaml +++ b/config/_templates/table_builder/mcap.yaml @@ -10,7 +10,7 @@ _convert_: all readers: - path: ??? reader: - _target_: rbyte.io.table.mcap.McapTableReader + _target_: rbyte.io.table.McapTableReader _recursive_: false decoder_factories: - rbyte.utils.mcap.ProtobufDecoderFactory diff --git a/examples/config_templates/table_builder/yaak.yaml b/config/_templates/table_builder/yaak.yaml similarity index 97% rename from examples/config_templates/table_builder/yaak.yaml rename to config/_templates/table_builder/yaak.yaml index c08e8a7..fcc3d5e 100644 --- a/examples/config_templates/table_builder/yaak.yaml +++ b/config/_templates/table_builder/yaak.yaml @@ -44,7 +44,7 @@ readers: - path: ??? reader: - _target_: rbyte.io.table.mcap.McapTableReader + _target_: rbyte.io.table.McapTableReader _recursive_: false decoder_factories: [rbyte.utils.mcap.ProtobufDecoderFactory] fields: diff --git a/examples/config_templates/table_writer/console.yaml b/config/_templates/table_writer/console.yaml similarity index 100% rename from examples/config_templates/table_writer/console.yaml rename to config/_templates/table_writer/console.yaml diff --git a/examples/config_templates/table_writer/csv.yaml b/config/_templates/table_writer/csv.yaml similarity index 100% rename from examples/config_templates/table_writer/csv.yaml rename to config/_templates/table_writer/csv.yaml diff --git a/examples/config_templates/table_writer/parquet.yaml b/config/_templates/table_writer/parquet.yaml similarity index 100% rename from examples/config_templates/table_writer/parquet.yaml rename to config/_templates/table_writer/parquet.yaml diff --git a/config/_templates/visualize.yaml b/config/_templates/visualize.yaml new file mode 100644 index 0000000..27d59df --- /dev/null +++ b/config/_templates/visualize.yaml @@ -0,0 +1,11 @@ +--- +defaults: + - /dataloader: torch + - /dataset: !!null + - /logger: !!null + - _self_ + +hydra: + output_subdir: !!null + run: + dir: . diff --git a/examples/config_templates/frame_reader/video/vali.yaml b/examples/config_templates/frame_reader/video/vali.yaml deleted file mode 100644 index af7a629..0000000 --- a/examples/config_templates/frame_reader/video/vali.yaml +++ /dev/null @@ -1,5 +0,0 @@ ---- -_target_: rbyte.io.frame.ValiGpuFrameReader -_convert_: all -path: ??? -pixel_format_chain: [NV12] diff --git a/examples/config_templates/logger/rerun/hdf5.yaml b/examples/config_templates/logger/rerun/hdf5.yaml deleted file mode 100644 index f287149..0000000 --- a/examples/config_templates/logger/rerun/hdf5.yaml +++ /dev/null @@ -1,23 +0,0 @@ -#@yaml/text-templated-strings - -#@ camera_topics = [ -#@ '/CAM_FRONT/image_rect_compressed', -#@ '/CAM_FRONT_LEFT/image_rect_compressed', -#@ '/CAM_FRONT_RIGHT/image_rect_compressed', -#@ ] ---- -_target_: rbyte.viz.loggers.RerunLogger -schema: - frame: - /obs/rgb: - Image: - color_model: RGB - - /obs/depth: - DepthImage: - color_model: L - - table: - _idx_: TimeSequenceColumn - obs/object: Points3D - task_successes: Scalar diff --git a/examples/config_templates/visualize.yaml b/examples/config_templates/visualize.yaml deleted file mode 100644 index d0fdabd..0000000 --- a/examples/config_templates/visualize.yaml +++ /dev/null @@ -1,25 +0,0 @@ ---- -defaults: - - /dataset: !!null - - /logger: !!null - - _self_ - -data_dir: ??? -dataloader: - _target_: torch.utils.data.DataLoader - dataset: ${dataset} - shuffle: false - batch_size: 4 - collate_fn: - _target_: rbyte.utils.dataloader.collate_identity - _partial_: true - - num_workers: 1 - pin_memory: false - persistent_workers: true - multiprocessing_context: forkserver - -hydra: - output_subdir: !!null - run: - dir: . diff --git a/justfile b/justfile index 454061e..0e7f388 100644 --- a/justfile +++ b/justfile @@ -11,7 +11,8 @@ install-tools: for tool in basedpyright ruff pre-commit; do uv tool install --force --upgrade $tool; done setup: sync install-tools - git submodule update --init --recursive --remote + git submodule update --init --recursive --force --remote + git lfs checkout uvx pre-commit install --install-hooks clean: @@ -20,41 +21,53 @@ clean: build: uv build +format *ARGS: + uvx ruff format {{ ARGS }} + +lint *ARGS: + uvx ruff check {{ ARGS }} + +typecheck *ARGS: + uvx basedpyright {{ ARGS }} + build-protos: uvx --from hatch hatch build --clean --hooks-only --target sdist pre-commit *ARGS: build-protos uvx pre-commit run --all-files --color=always {{ ARGS }} -generate-example-config: +generate-config: ytt --ignore-unknown-comments \ - --file {{ justfile_directory() }}/examples/config_templates \ - --output-files examples/config \ + --file {{ justfile_directory() }}/config/_templates \ + --output-files config \ --output yaml \ --strict +test *ARGS: generate-config + uv run pytest --capture=no {{ ARGS }} + [group('scripts')] -visualize *ARGS: generate-example-config +visualize *ARGS: generate-config uv run rbyte-visualize \ - --config-path {{ justfile_directory() }}/examples/config \ + --config-path {{ justfile_directory() }}/config \ --config-name visualize.yaml \ hydra/hydra_logging=disabled \ hydra/job_logging=disabled \ {{ ARGS }} [group('scripts')] -build-table *ARGS: generate-example-config +build-table *ARGS: generate-config uv run rbyte-build-table \ - --config-path {{ justfile_directory() }}/examples/config \ + --config-path {{ justfile_directory() }}/config \ --config-name build_table.yaml \ hydra/hydra_logging=disabled \ hydra/job_logging=disabled \ {{ ARGS }} [group('scripts')] -read-frames *ARGS: generate-example-config +read-frames *ARGS: generate-config uv run rbyte-read-frames \ - --config-path {{ justfile_directory() }}/examples/config \ + --config-path {{ justfile_directory() }}/config \ --config-name read_frames.yaml \ hydra/hydra_logging=disabled \ hydra/job_logging=disabled \ diff --git a/pyproject.toml b/pyproject.toml index 21c6c36..3b51a7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,13 +1,13 @@ [project] name = "rbyte" -version = "0.5.0" +version = "0.5.1" description = "Multimodal dataset library" authors = [{ name = "Evgenii Gorchakov", email = "evgenii@yaak.ai" }] maintainers = [{ name = "Evgenii Gorchakov", email = "evgenii@yaak.ai" }] dependencies = [ "tensordict @ git+https://github.com/pytorch/tensordict.git@85b6b81", "torch>=2.4.1", - "polars>=1.9.0", + "polars>=1.10.0", "pydantic>=2.9.2", "more-itertools>=10.5.0", "hydra-core>=1.3.2", @@ -46,7 +46,7 @@ yaak = ["protobuf", "ptars>=0.0.3"] jpeg = ["simplejpeg>=1.7.6"] video = [ "python-vali>=4.2.0.post0; sys_platform == 'linux'", - "video-reader-rs>=0.1.5", + "video-reader-rs>=0.1.7", ] hdf5 = ["h5py>=3.12.1"] @@ -70,6 +70,7 @@ dev-dependencies = [ "pudb>=2024.1.2", "ipython>=8.28.0", "ipython-autoimport>=0.5", + "pytest>=8.3.3", ] [tool.uv.sources] @@ -125,3 +126,7 @@ ignore = ["D", "CPY", "COM812", "F722", "PD901", "ISC001", "TD"] [tool.ruff.lint.isort] split-on-trailing-comma = false + +[tool.pytest.ini_options] +addopts = ["-ra", "-vv", "--import-mode=importlib"] +testpaths = ["tests"] diff --git a/src/rbyte/io/frame/__init__.py b/src/rbyte/io/frame/__init__.py index b60c385..893da97 100644 --- a/src/rbyte/io/frame/__init__.py +++ b/src/rbyte/io/frame/__init__.py @@ -23,10 +23,3 @@ pass else: __all__ += ["FfmpegFrameReader"] - -try: - from .video.vali_reader import ValiGpuFrameReader -except ImportError: - pass -else: - __all__ += ["ValiGpuFrameReader"] diff --git a/src/rbyte/io/frame/video/__init__.py b/src/rbyte/io/frame/video/__init__.py index 5e2a442..e69de29 100644 --- a/src/rbyte/io/frame/video/__init__.py +++ b/src/rbyte/io/frame/video/__init__.py @@ -1,17 +0,0 @@ -__all__: list[str] = [] - -try: - from .ffmpeg_reader import FfmpegFrameReader -except ImportError: - pass - -else: - __all__ += ["FfmpegFrameReader"] - -try: - from .vali_reader import ValiGpuFrameReader -except ImportError: - pass - -else: - __all__ += ["ValiGpuFrameReader"] diff --git a/src/rbyte/io/table/aligner.py b/src/rbyte/io/table/aligner.py index 0144c0d..6672bfd 100644 --- a/src/rbyte/io/table/aligner.py +++ b/src/rbyte/io/table/aligner.py @@ -7,7 +7,7 @@ import more_itertools as mit import polars as pl -from polars.type_aliases import AsofJoinStrategy +from polars._typing import AsofJoinStrategy from pydantic import StringConstraints, model_validator from structlog import get_logger from xxhash import xxh3_64_intdigest as digest diff --git a/src/rbyte/sample/builder.py b/src/rbyte/sample/builder.py index c8a8fc0..1906ad1 100644 --- a/src/rbyte/sample/builder.py +++ b/src/rbyte/sample/builder.py @@ -57,6 +57,4 @@ def build(self, source: pl.LazyFrame) -> pl.LazyFrame: .sql(f"select * from self where ({self._filter or True})") # noqa: S608 .sort(sample_idx_col) .select(pl.exclude(sample_idx_col)) - # TODO: https://github.com/pola-rs/polars/issues/18810 # noqa: FIX002 - # .select(pl.all().list.to_array(self._length)) ) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/data/Niro098-HQ/2024-06-18--13-39-54/ai.mcap b/tests/data/Niro098-HQ/2024-06-18--13-39-54/ai.mcap new file mode 100755 index 0000000..9b8b5c3 --- /dev/null +++ b/tests/data/Niro098-HQ/2024-06-18--13-39-54/ai.mcap @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd2ac4a592ab3cfa4d5c4c2fcbc2196c5daad82ae0adcb4e45a17dc66c035d9c +size 7542 diff --git a/tests/data/Niro098-HQ/2024-06-18--13-39-54/cam_front_left.pii.mp4 b/tests/data/Niro098-HQ/2024-06-18--13-39-54/cam_front_left.pii.mp4 new file mode 100755 index 0000000..d2ad03b --- /dev/null +++ b/tests/data/Niro098-HQ/2024-06-18--13-39-54/cam_front_left.pii.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3eee653caeeddb8cfca80bc50ac9057b164c6a00170c68288b2161c51491a9d +size 24848843 diff --git a/tests/data/Niro098-HQ/2024-06-18--13-39-54/cam_left_backward.pii.mp4 b/tests/data/Niro098-HQ/2024-06-18--13-39-54/cam_left_backward.pii.mp4 new file mode 100755 index 0000000..3f5de26 --- /dev/null +++ b/tests/data/Niro098-HQ/2024-06-18--13-39-54/cam_left_backward.pii.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8b4befb6f24b5c98248a21346cb5be7c24a8ee2ca6c4e5663d84d2e736dd1f7 +size 25062933 diff --git a/tests/data/Niro098-HQ/2024-06-18--13-39-54/cam_right_backward.pii.mp4 b/tests/data/Niro098-HQ/2024-06-18--13-39-54/cam_right_backward.pii.mp4 new file mode 100755 index 0000000..05b187b --- /dev/null +++ b/tests/data/Niro098-HQ/2024-06-18--13-39-54/cam_right_backward.pii.mp4 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40962b2f38e6cdf33ebf032ba88ab96e5b3315dca0ddf4dea6366507ea005471 +size 25428903 diff --git a/tests/data/Niro098-HQ/2024-06-18--13-39-54/metadata.log b/tests/data/Niro098-HQ/2024-06-18--13-39-54/metadata.log new file mode 100755 index 0000000..77d8713 --- /dev/null +++ b/tests/data/Niro098-HQ/2024-06-18--13-39-54/metadata.log @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b5bdbcd3999bb82b184783bcc39b0b1596b7bc2c39e43df8abc51f8812be464 +size 460865 diff --git a/tests/data/coffee.hdf5 b/tests/data/coffee.hdf5 new file mode 100644 index 0000000..8eed0b9 --- /dev/null +++ b/tests/data/coffee.hdf5 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d36b22adf480432910bf789bb87f9798c9d785da53619742b9576049c441bc8b +size 20019192 diff --git a/tests/data/nuScenes-v1.0-mini-scene-0061-cut.mcap b/tests/data/nuScenes-v1.0-mini-scene-0061-cut.mcap new file mode 100644 index 0000000..cab8d03 --- /dev/null +++ b/tests/data/nuScenes-v1.0-mini-scene-0061-cut.mcap @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e76b4d2ccf7912d488ad7e69c66201dd4234747b5b9c1dd375a83e0c3f13ce60 +size 25876698 diff --git a/tests/test_dataloader.py b/tests/test_dataloader.py new file mode 100644 index 0000000..f54b1d4 --- /dev/null +++ b/tests/test_dataloader.py @@ -0,0 +1,160 @@ +from pathlib import Path +from types import SimpleNamespace + +from hydra import compose, initialize +from hydra.utils import instantiate +from structlog import get_logger +from torch import Tensor + +logger = get_logger(__name__) + +CONFIG_PATH = "../config" +DATA_DIR = Path(__file__).resolve().parent / "data" + + +def test_mimicgen() -> None: + with initialize(version_base=None, config_path=CONFIG_PATH): + cfg = compose( + "visualize", overrides=["dataset=mimicgen", f"+data_dir={DATA_DIR}"] + ) + + dataloader = instantiate(cfg.dataloader) + + c = SimpleNamespace( + B=cfg.dataloader.batch_size, S=cfg.dataloader.dataset.sample_builder.length + ) + + batch = next(iter(dataloader)) + match batch.to_dict(): + case { + "frame": { + "obs/agentview_image": Tensor(shape=[c.B, c.S, *_]), + **frame_rest, + }, + "table": { + "_idx_": Tensor(shape=[c.B, c.S]), + "obs/robot0_eef_pos": Tensor(shape=[c.B, c.S, *_]), + **table_rest, + }, + "meta": { + "input_id": input_id, + "sample_idx": Tensor(shape=[c.B]), + **meta_rest, + }, + } if set(input_id).issubset(cfg.dataloader.dataset.inputs) and not any(( + frame_rest, + table_rest, + meta_rest, + )): + pass + + case _: + logger.error(msg := "invalid batch structure", batch=batch) + + raise AssertionError(msg) + + +def test_nuscenes() -> None: + with initialize(version_base=None, config_path=CONFIG_PATH): + cfg = compose( + "visualize", overrides=["dataset=nuscenes", f"+data_dir={DATA_DIR}"] + ) + + dataloader = instantiate(cfg.dataloader) + + c = SimpleNamespace( + B=cfg.dataloader.batch_size, S=cfg.dataloader.dataset.sample_builder.length + ) + + batch = next(iter(dataloader)) + match batch.to_dict(): + case { + "frame": { + "/CAM_FRONT/image_rect_compressed": Tensor(shape=[c.B, c.S, *_]), + "/CAM_FRONT_LEFT/image_rect_compressed": Tensor(shape=[c.B, c.S, *_]), + "/CAM_FRONT_RIGHT/image_rect_compressed": Tensor(shape=[c.B, c.S, *_]), + **frame_rest, + }, + "table": { + "/CAM_FRONT/image_rect_compressed/_idx_": Tensor(shape=[c.B, c.S]), + "/CAM_FRONT_LEFT/image_rect_compressed/_idx_": Tensor(shape=[c.B, c.S]), + "/CAM_FRONT_RIGHT/image_rect_compressed/_idx_": Tensor( + shape=[c.B, c.S] + ), + "/CAM_FRONT/image_rect_compressed/log_time": Tensor(shape=[c.B, c.S]), + "/CAM_FRONT_LEFT/image_rect_compressed/log_time": Tensor( + shape=[c.B, c.S] + ), + "/CAM_FRONT_RIGHT/image_rect_compressed/log_time": Tensor( + shape=[c.B, c.S] + ), + "/odom/vel.x": Tensor(shape=[c.B, c.S]), + **table_rest, + }, + "meta": { + "input_id": input_id, + "sample_idx": Tensor(shape=[c.B]), + **meta_rest, + }, + } if set(input_id).issubset(cfg.dataloader.dataset.inputs) and not any(( + frame_rest, + table_rest, + meta_rest, + )): + pass + + case _: + logger.error(msg := "invalid batch structure", batch=batch) + + raise AssertionError(msg) + + +def test_yaak() -> None: + with initialize(version_base=None, config_path=CONFIG_PATH): + cfg = compose("visualize", overrides=["dataset=yaak", f"+data_dir={DATA_DIR}"]) + + dataloader = instantiate(cfg.dataloader) + + c = SimpleNamespace( + B=cfg.dataloader.batch_size, S=cfg.dataloader.dataset.sample_builder.length + ) + + batch = next(iter(dataloader)) + match batch.to_dict(): + case { + "frame": { + "cam_front_left": Tensor(shape=[c.B, c.S, *_]), + "cam_left_backward": Tensor(shape=[c.B, c.S, *_]), + "cam_right_backward": Tensor(shape=[c.B, c.S, *_]), + **frame_rest, + }, + "table": { + "ImageMetadata.cam_front_left.frame_idx": Tensor(shape=[c.B, c.S]), + "ImageMetadata.cam_front_left.time_stamp": Tensor(shape=[c.B, c.S]), + "ImageMetadata.cam_left_backward.frame_idx": Tensor(shape=[c.B, c.S]), + "ImageMetadata.cam_left_backward.time_stamp": Tensor(shape=[c.B, c.S]), + "ImageMetadata.cam_right_backward.frame_idx": Tensor(shape=[c.B, c.S]), + "ImageMetadata.cam_right_backward.time_stamp": Tensor(shape=[c.B, c.S]), + "VehicleMotion.gear": Tensor(shape=[c.B, c.S]), + "VehicleMotion.speed": Tensor(shape=[c.B, c.S]), + "VehicleMotion.time_stamp": Tensor(shape=[c.B, c.S]), + "/ai/safety_score.clip.end_timestamp": Tensor(shape=[c.B, c.S]), + "/ai/safety_score.score": Tensor(shape=[c.B, c.S]), + **table_rest, + }, + "meta": { + "input_id": input_id, + "sample_idx": Tensor(shape=[c.B]), + **meta_rest, + }, + } if set(input_id).issubset(cfg.dataloader.dataset.inputs) and not any(( + frame_rest, + table_rest, + meta_rest, + )): + pass + + case _: + logger.error(msg := "invalid batch structure", batch=batch) + + raise AssertionError(msg)