Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: bump pipefunc #38

Merged
merged 1 commit into from
Mar 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions config/_templates/dataset/mimicgen.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ samples:
executor:
_target_: concurrent.futures.ThreadPoolExecutor

storage: dict
pipeline:
_target_: pipefunc.Pipeline
validate_type_annotations: false
Expand Down
1 change: 1 addition & 0 deletions config/_templates/dataset/nuscenes/mcap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ samples:
executor:
_target_: concurrent.futures.ThreadPoolExecutor

storage: dict
pipeline:
_target_: pipefunc.Pipeline
validate_type_annotations: false
Expand Down
1 change: 1 addition & 0 deletions config/_templates/dataset/nuscenes/rrd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ samples:
executor:
_target_: concurrent.futures.ThreadPoolExecutor

storage: dict
pipeline:
_target_: pipefunc.Pipeline
validate_type_annotations: false
Expand Down
1 change: 1 addition & 0 deletions config/_templates/dataset/yaak.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ samples:
executor:
_target_: concurrent.futures.ThreadPoolExecutor

storage: dict
pipeline:
_target_: pipefunc.Pipeline
validate_type_annotations: false
Expand Down
1 change: 1 addition & 0 deletions config/_templates/dataset/zod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ samples:
executor:
_target_: concurrent.futures.ThreadPoolExecutor

storage: dict
pipeline:
_target_: pipefunc.Pipeline
validate_type_annotations: false
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "rbyte"
version = "0.14.0"
version = "0.14.1"
description = "Multimodal PyTorch dataset library"
authors = [{ name = "Evgenii Gorchakov", email = "[email protected]" }]
maintainers = [{ name = "Evgenii Gorchakov", email = "[email protected]" }]
Expand All @@ -16,7 +16,7 @@ dependencies = [
"cachetools>=5.5.1",
"structlog>=25.1.0",
"tqdm>=4.67.1",
"pipefunc[autodoc]>=0.56.0",
"pipefunc[autodoc]>=0.57.0",
"xxhash>=3.5.0",
]
readme = "README.md"
Expand Down
21 changes: 6 additions & 15 deletions src/rbyte/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,13 @@
from concurrent.futures import Executor
from enum import StrEnum, unique
from functools import cache
from pathlib import Path
from typing import Annotated, Any, Literal, override
from typing import Annotated, Any, ClassVar, Literal, override

import polars as pl
import torch
from optree import tree_map, tree_structure, tree_transpose
from pipefunc import Pipeline
from pipefunc._pipeline._types import OUTPUT_TYPE, StorageType
from pipefunc.map import run_map
from pipefunc._pipeline._types import OUTPUT_TYPE
from pydantic import ConfigDict, StringConstraints, validate_call
from structlog import get_logger
from tensordict import TensorDict
Expand Down Expand Up @@ -39,20 +37,14 @@ class SourceConfig(BaseModel):


class PipelineConfig(BaseModel):
model_config: ClassVar[ConfigDict] = ConfigDict(extra="allow")

pipeline: HydraConfig[Pipeline]
inputs: Mapping[str, Any]
run_folder: str | Path | None = None
parallel: bool = True
executor: (
HydraConfig[Executor] | dict[OUTPUT_TYPE, HydraConfig[Executor]] | None
) = None
chunksizes: int | dict[OUTPUT_TYPE, int] | None = None
storage: StorageType = "dict"
persist_memory: bool = True
cleanup: bool = True
fixed_indices: dict[str, int | slice] | None = None
auto_subpipeline: bool = False
show_progress: bool = False
return_results: Literal[True] = True


@unique
Expand Down Expand Up @@ -226,8 +218,7 @@ def _build_samples(cls, samples: PipelineConfig) -> pl.DataFrame:
samples.executor, # pyright: ignore[reportArgumentType]
)

results = run_map(
pipeline=pipeline,
results = pipeline.map(
inputs=inputs,
executor=executor,
**samples.model_dump(exclude={"pipeline", "inputs", "executor"}),
Expand Down
Loading