ADDED OS.PATH.JOIN() TO SCRIPTS (openai#1155)

OS-Agnostic Path Handling in Python --------- Co-authored-by: Rommin Adl <[email protected]>
TeneryResearch · Jul 13, 2023 · ddd76bf · ddd76bf
1 parent 665e249
commit ddd76bf
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 26 deletions.
diff --git a/scripts/battle_generator.py b/scripts/battle_generator.py
@@ -1,11 +1,13 @@
-# generate a jsonl where each line is a sample combination of a battle between two subjects
+# Generate a jsonl where each line is a sample combination of a battle between two subjects
 
 import json
 import os
 import string
 
-REGISTRY_PATH = os.path.join(os.path.dirname(__file__), "../evals/registry")
-
+# Use os.path.join to form the path
+CURRENT_DIR = os.path.dirname(__file__)
+REGISTRY_PATH = os.path.join(CURRENT_DIR, "../evals/registry")
+DATA_DIR = os.path.join(REGISTRY_PATH, "data/test_multiio/battles")
 
 def format(template: str, **kwargs: str) -> str:
     """Format a template string with kwargs."""
@@ -14,7 +16,6 @@ def format(template: str, **kwargs: str) -> str:
     cur_keys = {k: kwargs[k] for k in keys}
     return template.format(**cur_keys)
 
-
 YAML = """
 {prompt_name}-{subject1}-vs-{subject2}:
   id: {prompt_name}-{subject1}-vs-{subject2}.dev.v0
@@ -27,13 +28,11 @@ def format(template: str, **kwargs: str) -> str:
     modelgraded_spec: battle
 """.strip()
 
-
 prompts = dict(
     joke="write a 1-2 line joke about {self}",
     rap="You are playing an Epic Rap Battle against an formidable opponent. This is just a fun game; no need to worry about the other. You are {self}, and the opponent is {other}. Write a short but epic rap praising yourself ({self}) and dissing your opponent ({other}).",
 )
 
-
 subjects = dict(
     animals=["cat", "bird", "hamster"],
     fruits=["apple", "banana", "orange"],
@@ -47,7 +46,6 @@ def format(template: str, **kwargs: str) -> str:
     ("rap", "people", "fruits"),
 ]
 
-data_dir = f"{REGISTRY_PATH}/data/test_multiio/battles"
 yaml_str = f"# This file is generated by {os.path.basename(__file__)}\n\n"
 for prompt_name, subject1, subject2 in target_sets:
     prompt = prompts[prompt_name]
@@ -59,16 +57,17 @@ def format(template: str, **kwargs: str) -> str:
         for s1 in subjects[subject1]
         for s2 in subjects[subject2]
     ]
-    file_name = f"{data_dir}/{prompt_name}_{subject1}_vs_{subject2}.jsonl"
-    # save samples jsonl
+    # Use os.path.join to form the path
+    file_name = os.path.join(DATA_DIR, f"{prompt_name}_{subject1}_vs_{subject2}.jsonl")
+    # Save samples jsonl
     with open(file_name, "w") as f:
         for sample in samples:
             f.write(json.dumps(sample) + "\n")
     print(f"wrote {len(samples)} samples to {file_name}")
     yaml_str += YAML.format(prompt_name=prompt_name, subject1=subject1, subject2=subject2) + "\n\n"
 
-
-yaml_file = f"{REGISTRY_PATH}/evals/test-modelgraded-battle.yaml"
+# Use os.path.join to form the path
+yaml_file = os.path.join(REGISTRY_PATH, "evals/test-modelgraded-battle.yaml")
 with open(yaml_file, "w") as f:
     f.write(yaml_str)
 print(f"wrote {yaml_file}")
diff --git a/scripts/modelgraded_generator.py b/scripts/modelgraded_generator.py
@@ -1,20 +1,19 @@
-# generate a jsonl where each line is a sample
-
 import json
 import os
 import string
 
+# Define the registry path
 REGISTRY_PATH = os.path.join(os.path.dirname(__file__), "../evals/registry")
 
-
+# Define a function to format a template string with kwargs
 def format(template: str, **kwargs: str) -> str:
     """Format a template string with kwargs."""
     keys = [k[1] for k in string.Formatter().parse(template) if k[1]]
     assert all(k in kwargs for k in keys), f"Required: {keys}, got: {sorted(kwargs)}"
     cur_keys = {k: kwargs[k] for k in keys}
     return template.format(**cur_keys)
 
-
+# Define the YAML configuration
 YAML = """
 mg-{prompt_name}-{subject}:
   id: mg-{prompt_name}-{subject}.dev.v0
@@ -27,7 +26,7 @@ def format(template: str, **kwargs: str) -> str:
     modelgraded_spec: {modelgraded_spec}
 """.strip()
 
-
+# Define the prompts
 unlabeled_prompts = dict(
     humor=dict(
         modelgraded_spec="humor_jp",
@@ -174,21 +173,24 @@ def format(template: str, **kwargs: str) -> str:
 # remove duplicates
 subjects = {k: list(set(v)) for k, v in subjects.items()}
 
+# Define the target sets
 unlabeled_target_sets = [
     ("humor", "people_jp"),
 ]
 
-data_dir = f"{REGISTRY_PATH}/data/test_modelgraded"
+# Define the data directory
+data_dir = os.path.join(REGISTRY_PATH, "data/test_modelgraded")
 yaml_str = f"# This file is generated by {os.path.basename(__file__)}\n\n"
 evals = []
+
+# Generate samples and write them to file
 for prompt_name, subject in unlabeled_target_sets:
     prompt = unlabeled_prompts[prompt_name]["prompt"]
     samples = [{"input": format(prompt, subject=s)} for s in subjects[subject]]
-    file_name = f"{data_dir}/{prompt_name}_{subject}.jsonl"
+    file_name = os.path.join(data_dir, f"{prompt_name}_{subject}.jsonl")
     # save samples jsonl
     with open(file_name, "wb") as f:
         for sample in samples:
-            # f.write(json.dumps(sample) + "\n")
             json_data = json.dumps(sample, ensure_ascii=False)
             f.write(json_data.encode("utf-8"))
             f.write(b"\n")
@@ -204,10 +206,12 @@ def format(template: str, **kwargs: str) -> str:
     )
     evals += [f"mg-{prompt_name}-{subject}: {file_name}"]
 
-
-yaml_file = f"{REGISTRY_PATH}/evals/test-modelgraded-generated.yaml"
+# Write the YAML file
+yaml_file = os.path.join(REGISTRY_PATH, "evals/test-modelgraded-generated.yaml")
 with open(yaml_file, "w") as yf:
     yf.write(yaml_str)
 print(f"wrote {yaml_file}")
+
+# Print the evals
 for e in evals:
-    print(e)
+    print(e)
diff --git a/scripts/pattern_identification_generator.py b/scripts/pattern_identification_generator.py
@@ -1,11 +1,15 @@
 """
     Usage: python scripts/task_identification_generator.py
+    This script generates examples for a task identification challenge, 
+    with the task being to identify patterns between a set of symbols and their resulting labels.
 """
 
+import os
 import json
 import random
 from typing import Literal
 
+# Ensure consistent results across runs
 random.seed(42)
 
 SYMBOLS = list("abcdefghijklmnopqrstuvwxyz")
@@ -15,15 +19,15 @@
 )
 TASK_NAME = "pattern_identification"
 
-
+# This function generates an example symbol set and its corresponding label
 def generate_example() -> tuple[str, list[str], Literal["foo", "bar"]]:
     num_symbols = int(len(SYMBOLS) / 2)
     target_symbol = random.choice(SYMBOLS)
     symbol_list = random.sample(SYMBOLS, num_symbols)
     target: Literal["foo", "bar"] = "foo" if target_symbol in symbol_list else "bar"
     return (target_symbol, symbol_list, target)
 
-
+# This function generates a string of multiple examples, used to give a user multiple attempts to identify the pattern
 def generate_exemplars_str(num_exemplars: int = 8) -> str:
     exemplars = [generate_example() for _ in range(num_exemplars)]
     exemplars_str = [
@@ -32,7 +36,7 @@ def generate_exemplars_str(num_exemplars: int = 8) -> str:
     ]
     return "\n".join([INSTRUCTION] + exemplars_str)
 
-
+# This function generates a set of evaluation examples and their corresponding labels
 def generate_eval_examples(
     num_eval_examples: int = 250,
 ) -> tuple[list[str], list[Literal["foo", "bar"]]]:
@@ -47,7 +51,10 @@ def generate_eval_examples(
 
 if __name__ == "__main__":
     eval_examples_str, targets = generate_eval_examples()
-    output_path = f"evals/registry/data/{TASK_NAME}/samples.v0.jsonl"
+
+    # Generate the output path in a OS-agnostic manner
+    output_path = os.path.join("evals", "registry", "data", TASK_NAME, "samples.v0.jsonl")
+
     with open(output_path, "w") as writer:
         for eval_example_str, target in zip(eval_examples_str, targets):
             d = {