Skip to content

Commit

Permalink
remove builder file and make tests pass
Browse files Browse the repository at this point in the history
  • Loading branch information
shreyashankar committed Jan 10, 2025
1 parent fffe345 commit 6f682d6
Show file tree
Hide file tree
Showing 7 changed files with 18 additions and 37 deletions.
2 changes: 1 addition & 1 deletion docetl/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
__version__ = "0.2.1"

from docetl.runner import DSLRunner
from docetl.builder import Optimizer
from docetl.optimizer import Optimizer

__all__ = ["DSLRunner", "Optimizer"]
17 changes: 9 additions & 8 deletions docetl/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,18 +564,19 @@ def next(
is_build, sample_size_needed
)

# Print step logs
# Print step logs only if not building
self.runner.datasets[self.name.split("/")[0]] = Dataset(
self, "memory", output_data
)
flush_cache(self.runner.console)
self.runner.console.log(
Panel.fit(
step_logs
+ f"Step [cyan]{self.name}[/cyan] completed. Cost: [green]${step_cost:.2f}[/green]",
title=f"[bold blue]Step Execution: {self.name}[/bold blue]",
if not is_build:
flush_cache(self.runner.console)
self.runner.console.log(
Panel.fit(
step_logs
+ f"Step [cyan]{self.name}[/cyan] completed. Cost: [green]${step_cost:.2f}[/green]",
title=f"[bold blue]Step Execution: {self.name}[/bold blue]",
)
)
)

return output_data, 0, ""

Expand Down
11 changes: 4 additions & 7 deletions docetl/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,14 +426,15 @@ def optimize(self) -> float:
else:
self._insert_empty_resolve_operations()

# Print the query plan
self.runner.print_query_plan()

# Start with the last operation container and visit each child
self.runner.last_op_container.optimize()

flush_cache(self.console)

# Print the query plan
self.console.rule("[bold cyan]Optimized Query Plan[/bold cyan]")
self.runner.print_query_plan()

return self.llm_client.total_cost

def _optimize_equijoin(
Expand Down Expand Up @@ -712,10 +713,6 @@ def save_optimized_config(self, optimized_config_path: str):
Saves the optimized configuration to a YAML file after resolving all references
and cleaning up internal optimization artifacts.
"""
# Print the optimized query plan
self.console.rule("[bold cyan]Optimized Query Plan[/bold cyan]")
self.runner.print_query_plan()

resolved_config = self.clean_optimized_config()

with open(optimized_config_path, "w") as f:
Expand Down
20 changes: 2 additions & 18 deletions tests/basic/test_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import pytest
import json
import shutil
from docetl.builder import Optimizer
from docetl.runner import DSLRunner

@pytest.fixture
Expand Down Expand Up @@ -71,22 +70,13 @@ def runner(test_config):
def test_optimize_map_operation(runner, test_dir):
"""Test that the optimizer can optimize a simple map operation"""

# Initialize optimizer
optimizer = Optimizer(
runner=runner,
model="gpt-4o-mini",
timeout=30
)

# Run optimization
total_cost = optimizer.optimize()
optimized_config, total_cost = runner.optimize()

# Check that optimization completed successfully
assert total_cost >= 0 # Cost should be non-negative

# Get the optimized config
optimized_config = optimizer.clean_optimized_config()

# Check that the optimized config contains operations
assert "operations" in optimized_config
assert len(optimized_config["operations"]) > 0
Expand All @@ -101,10 +91,4 @@ def test_optimize_map_operation(runner, test_dir):
assert first_step["name"] == "name_extraction"
assert "operations" in first_step
assert len(first_step["operations"]) > 0

# Save the optimized config
output_path = test_dir / "optimized_config.yaml"
optimizer.save_optimized_config(str(output_path))

# Check that the file was created
assert output_path.exists()

2 changes: 1 addition & 1 deletion tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def test_pipeline_optimization(
)

assert isinstance(optimized_pipeline, Pipeline)
assert len(optimized_pipeline.operations) == len(pipeline.operations)
assert len(optimized_pipeline.operations) == len(pipeline.operations) + 1
assert len(optimized_pipeline.steps) == len(pipeline.steps)


Expand Down
2 changes: 1 addition & 1 deletion tests/test_synth_gather.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import json
import tempfile
import os
from docetl.builder import Optimizer

from docetl.runner import DSLRunner
from docetl.operations.split import SplitOperation
from docetl.operations.map import MapOperation
Expand Down
1 change: 0 additions & 1 deletion tests/test_synth_resolve.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import json
import tempfile
import os
from docetl.builder import Optimizer


@pytest.fixture
Expand Down

0 comments on commit 6f682d6

Please sign in to comment.