From c6f94913cd0d3552c38b4132018294b0d1c7ad27 Mon Sep 17 00:00:00 2001 From: Shreya Shankar Date: Wed, 9 Oct 2024 17:09:45 -0700 Subject: [PATCH] fix: handle azure gpt-4o-mini and output to csv --- docetl/operations/utils.py | 6 +++--- docetl/runner.py | 26 +++++++++++++++++++++----- poetry.lock | 14 +++++++------- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/docetl/operations/utils.py b/docetl/operations/utils.py index f4ce3c48..163015ed 100644 --- a/docetl/operations/utils.py +++ b/docetl/operations/utils.py @@ -615,7 +615,7 @@ def call_llm_with_cache( len(props) == 1 and list(props.values())[0].get("type") == "string" and scratchpad is None - and "ollama" in model + and ("ollama" in model or "azure/gpt-4o-mini" in model) ): use_tools = False @@ -635,7 +635,7 @@ def call_llm_with_cache( "type": "function", "function": { "name": "send_output", - "description": "Send structured output back to the user", + "description": "Send output back to the user", "strict": True, "parameters": parameters, "additionalProperties": False, @@ -858,7 +858,7 @@ def call_llm_with_gleaning( "type": "function", "function": { "name": "send_output", - "description": "Send structured output back to the user", + "description": "Send output back to the user", "strict": True, "parameters": parameters, "additionalProperties": False, diff --git a/docetl/runner.py b/docetl/runner.py index c81e13dd..6be5da4d 100644 --- a/docetl/runner.py +++ b/docetl/runner.py @@ -54,13 +54,16 @@ def __init__(self, config: Dict, max_threads: int = None): # Check if output path is correctly formatted as JSON output_path = self.config.get("pipeline", {}).get("output", {}).get("path") if output_path: - if not output_path.lower().endswith(".json"): + if not ( + output_path.lower().endswith(".json") + or output_path.lower().endswith(".csv") + ): raise ValueError( - f"Output path '{output_path}' is not a JSON file. Please provide a path ending with '.json'." + f"Output path '{output_path}' is not a JSON or CSV file. Please provide a path ending with '.json' or '.csv'." ) else: raise ValueError( - "No output path specified in the configuration. Please provide an output path ending with '.json' in the configuration." + "No output path specified in the configuration. Please provide an output path ending with '.json' or '.csv' in the configuration." ) self.syntax_check() @@ -77,6 +80,11 @@ def __init__(self, config: Dict, max_threads: int = None): all_ops_until_and_including_current = [ op_map[prev_op] for prev_op in step["operations"][:idx] ] + [op_map[op_name]] + # If there's no model in the op, add the default model + for op in all_ops_until_and_including_current: + if "model" not in op: + op["model"] = self.default_model + all_ops_str = json.dumps(all_ops_until_and_including_current) self.step_op_hashes[step["name"]][op_name] = hashlib.sha256( all_ops_str.encode() @@ -207,8 +215,16 @@ def save_output(self, data: List[Dict]): self.console.rule("[cyan]Saving Output[/cyan]") output_config = self.config["pipeline"]["output"] if output_config["type"] == "file": - with open(output_config["path"], "w") as file: - json.dump(data, file, indent=2) + if output_config["path"].lower().endswith(".json"): + with open(output_config["path"], "w") as file: + json.dump(data, file, indent=2) + else: # CSV + import csv + + with open(output_config["path"], "w", newline="") as file: + writer = csv.DictWriter(file, fieldnames=data[0].keys()) + writer.writeheader() + writer.writerows(data) self.console.print( f"[green italic]💾 Output saved to {output_config['path']}[/green italic]" ) diff --git a/poetry.lock b/poetry.lock index 8eedfc0d..a5ae584d 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1534,13 +1534,13 @@ requests = ">=2.20" [[package]] name = "litellm" -version = "1.48.10" +version = "1.49.0" description = "Library to easily interface with LLM API providers" optional = false python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" files = [ - {file = "litellm-1.48.10-py3-none-any.whl", hash = "sha256:752efd59747a0895f4695d025c66f0b2258d80a61175f7cfa41dbe4894ef95e1"}, - {file = "litellm-1.48.10.tar.gz", hash = "sha256:0a4ff75da78e66baeae0658ad8de498298310a5efda74c3d840ce2b013e8401d"}, + {file = "litellm-1.49.0-py3-none-any.whl", hash = "sha256:53711018b730f8a4262c11461b702b771e46e0c974f9c0bcd5b384b027308dd5"}, + {file = "litellm-1.49.0.tar.gz", hash = "sha256:f5ef51b571b14de318fccdd6728f4e705aad68250f9ed374c7fe6c4e95d6c008"}, ] [package.dependencies] @@ -1549,7 +1549,7 @@ click = "*" importlib-metadata = ">=6.8.0" jinja2 = ">=3.1.2,<4.0.0" jsonschema = ">=4.22.0,<5.0.0" -openai = ">=1.45.0" +openai = ">=1.51.0" pydantic = ">=2.0.0,<3.0.0" python-dotenv = ">=0.2.0" requests = ">=2.31.0,<3.0.0" @@ -2330,13 +2330,13 @@ files = [ [[package]] name = "openai" -version = "1.50.2" +version = "1.51.2" description = "The official Python library for the openai API" optional = false python-versions = ">=3.7.1" files = [ - {file = "openai-1.50.2-py3-none-any.whl", hash = "sha256:822dd2051baa3393d0d5406990611975dd6f533020dc9375a34d4fe67e8b75f7"}, - {file = "openai-1.50.2.tar.gz", hash = "sha256:3987ae027152fc8bea745d60b02c8f4c4a76e1b5c70e73565fa556db6f78c9e6"}, + {file = "openai-1.51.2-py3-none-any.whl", hash = "sha256:5c5954711cba931423e471c37ff22ae0fd3892be9b083eee36459865fbbb83fa"}, + {file = "openai-1.51.2.tar.gz", hash = "sha256:c6a51fac62a1ca9df85a522e462918f6bb6bc51a8897032217e453a0730123a6"}, ] [package.dependencies]