Skip to content

Commit

Permalink
chore: bump pipefunc
Browse files Browse the repository at this point in the history
  • Loading branch information
egorchakov committed Mar 4, 2025
1 parent a425cdf commit 0b30a7f
Show file tree
Hide file tree
Showing 7 changed files with 13 additions and 17 deletions.
1 change: 1 addition & 0 deletions config/_templates/dataset/mimicgen.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ samples:
executor:
_target_: concurrent.futures.ThreadPoolExecutor

storage: dict
pipeline:
_target_: pipefunc.Pipeline
validate_type_annotations: false
Expand Down
1 change: 1 addition & 0 deletions config/_templates/dataset/nuscenes/mcap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ samples:
executor:
_target_: concurrent.futures.ThreadPoolExecutor

storage: dict
pipeline:
_target_: pipefunc.Pipeline
validate_type_annotations: false
Expand Down
1 change: 1 addition & 0 deletions config/_templates/dataset/nuscenes/rrd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ samples:
executor:
_target_: concurrent.futures.ThreadPoolExecutor

storage: dict
pipeline:
_target_: pipefunc.Pipeline
validate_type_annotations: false
Expand Down
1 change: 1 addition & 0 deletions config/_templates/dataset/yaak.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ samples:
executor:
_target_: concurrent.futures.ThreadPoolExecutor

storage: dict
pipeline:
_target_: pipefunc.Pipeline
validate_type_annotations: false
Expand Down
1 change: 1 addition & 0 deletions config/_templates/dataset/zod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ samples:
executor:
_target_: concurrent.futures.ThreadPoolExecutor

storage: dict
pipeline:
_target_: pipefunc.Pipeline
validate_type_annotations: false
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "rbyte"
version = "0.14.0"
version = "0.14.1"
description = "Multimodal PyTorch dataset library"
authors = [{ name = "Evgenii Gorchakov", email = "[email protected]" }]
maintainers = [{ name = "Evgenii Gorchakov", email = "[email protected]" }]
Expand All @@ -16,7 +16,7 @@ dependencies = [
"cachetools>=5.5.1",
"structlog>=25.1.0",
"tqdm>=4.67.1",
"pipefunc[autodoc]>=0.56.0",
"pipefunc[autodoc]>=0.57.0",
"xxhash>=3.5.0",
]
readme = "README.md"
Expand Down
21 changes: 6 additions & 15 deletions src/rbyte/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,13 @@
from concurrent.futures import Executor
from enum import StrEnum, unique
from functools import cache
from pathlib import Path
from typing import Annotated, Any, Literal, override
from typing import Annotated, Any, ClassVar, Literal, override

import polars as pl
import torch
from optree import tree_map, tree_structure, tree_transpose
from pipefunc import Pipeline
from pipefunc._pipeline._types import OUTPUT_TYPE, StorageType
from pipefunc.map import run_map
from pipefunc._pipeline._types import OUTPUT_TYPE
from pydantic import ConfigDict, StringConstraints, validate_call
from structlog import get_logger
from tensordict import TensorDict
Expand Down Expand Up @@ -39,20 +37,14 @@ class SourceConfig(BaseModel):


class PipelineConfig(BaseModel):
model_config: ClassVar[ConfigDict] = ConfigDict(extra="allow")

pipeline: HydraConfig[Pipeline]
inputs: Mapping[str, Any]
run_folder: str | Path | None = None
parallel: bool = True
executor: (
HydraConfig[Executor] | dict[OUTPUT_TYPE, HydraConfig[Executor]] | None
) = None
chunksizes: int | dict[OUTPUT_TYPE, int] | None = None
storage: StorageType = "dict"
persist_memory: bool = True
cleanup: bool = True
fixed_indices: dict[str, int | slice] | None = None
auto_subpipeline: bool = False
show_progress: bool = False
return_results: Literal[True] = True


@unique
Expand Down Expand Up @@ -226,8 +218,7 @@ def _build_samples(cls, samples: PipelineConfig) -> pl.DataFrame:
samples.executor, # pyright: ignore[reportArgumentType]
)

results = run_map(
pipeline=pipeline,
results = pipeline.map(
inputs=inputs,
executor=executor,
**samples.model_dump(exclude={"pipeline", "inputs", "executor"}),
Expand Down

0 comments on commit 0b30a7f

Please sign in to comment.