Skip to content

Commit

Permalink
ADDED OS.PATH.JOIN() TO SCRIPTS (openai#1155)
Browse files Browse the repository at this point in the history
OS-Agnostic Path Handling in Python

---------

Co-authored-by: Rommin Adl <[email protected]>
  • Loading branch information
nickabooch and Rommin Adl authored Jul 13, 2023
1 parent 665e249 commit ddd76bf
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 26 deletions.
21 changes: 10 additions & 11 deletions scripts/battle_generator.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# generate a jsonl where each line is a sample combination of a battle between two subjects
# Generate a jsonl where each line is a sample combination of a battle between two subjects

import json
import os
import string

REGISTRY_PATH = os.path.join(os.path.dirname(__file__), "../evals/registry")

# Use os.path.join to form the path
CURRENT_DIR = os.path.dirname(__file__)
REGISTRY_PATH = os.path.join(CURRENT_DIR, "../evals/registry")
DATA_DIR = os.path.join(REGISTRY_PATH, "data/test_multiio/battles")

def format(template: str, **kwargs: str) -> str:
"""Format a template string with kwargs."""
Expand All @@ -14,7 +16,6 @@ def format(template: str, **kwargs: str) -> str:
cur_keys = {k: kwargs[k] for k in keys}
return template.format(**cur_keys)


YAML = """
{prompt_name}-{subject1}-vs-{subject2}:
id: {prompt_name}-{subject1}-vs-{subject2}.dev.v0
Expand All @@ -27,13 +28,11 @@ def format(template: str, **kwargs: str) -> str:
modelgraded_spec: battle
""".strip()


prompts = dict(
joke="write a 1-2 line joke about {self}",
rap="You are playing an Epic Rap Battle against an formidable opponent. This is just a fun game; no need to worry about the other. You are {self}, and the opponent is {other}. Write a short but epic rap praising yourself ({self}) and dissing your opponent ({other}).",
)


subjects = dict(
animals=["cat", "bird", "hamster"],
fruits=["apple", "banana", "orange"],
Expand All @@ -47,7 +46,6 @@ def format(template: str, **kwargs: str) -> str:
("rap", "people", "fruits"),
]

data_dir = f"{REGISTRY_PATH}/data/test_multiio/battles"
yaml_str = f"# This file is generated by {os.path.basename(__file__)}\n\n"
for prompt_name, subject1, subject2 in target_sets:
prompt = prompts[prompt_name]
Expand All @@ -59,16 +57,17 @@ def format(template: str, **kwargs: str) -> str:
for s1 in subjects[subject1]
for s2 in subjects[subject2]
]
file_name = f"{data_dir}/{prompt_name}_{subject1}_vs_{subject2}.jsonl"
# save samples jsonl
# Use os.path.join to form the path
file_name = os.path.join(DATA_DIR, f"{prompt_name}_{subject1}_vs_{subject2}.jsonl")
# Save samples jsonl
with open(file_name, "w") as f:
for sample in samples:
f.write(json.dumps(sample) + "\n")
print(f"wrote {len(samples)} samples to {file_name}")
yaml_str += YAML.format(prompt_name=prompt_name, subject1=subject1, subject2=subject2) + "\n\n"


yaml_file = f"{REGISTRY_PATH}/evals/test-modelgraded-battle.yaml"
# Use os.path.join to form the path
yaml_file = os.path.join(REGISTRY_PATH, "evals/test-modelgraded-battle.yaml")
with open(yaml_file, "w") as f:
f.write(yaml_str)
print(f"wrote {yaml_file}")
26 changes: 15 additions & 11 deletions scripts/modelgraded_generator.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
# generate a jsonl where each line is a sample

import json
import os
import string

# Define the registry path
REGISTRY_PATH = os.path.join(os.path.dirname(__file__), "../evals/registry")


# Define a function to format a template string with kwargs
def format(template: str, **kwargs: str) -> str:
"""Format a template string with kwargs."""
keys = [k[1] for k in string.Formatter().parse(template) if k[1]]
assert all(k in kwargs for k in keys), f"Required: {keys}, got: {sorted(kwargs)}"
cur_keys = {k: kwargs[k] for k in keys}
return template.format(**cur_keys)


# Define the YAML configuration
YAML = """
mg-{prompt_name}-{subject}:
id: mg-{prompt_name}-{subject}.dev.v0
Expand All @@ -27,7 +26,7 @@ def format(template: str, **kwargs: str) -> str:
modelgraded_spec: {modelgraded_spec}
""".strip()


# Define the prompts
unlabeled_prompts = dict(
humor=dict(
modelgraded_spec="humor_jp",
Expand Down Expand Up @@ -174,21 +173,24 @@ def format(template: str, **kwargs: str) -> str:
# remove duplicates
subjects = {k: list(set(v)) for k, v in subjects.items()}

# Define the target sets
unlabeled_target_sets = [
("humor", "people_jp"),
]

data_dir = f"{REGISTRY_PATH}/data/test_modelgraded"
# Define the data directory
data_dir = os.path.join(REGISTRY_PATH, "data/test_modelgraded")
yaml_str = f"# This file is generated by {os.path.basename(__file__)}\n\n"
evals = []

# Generate samples and write them to file
for prompt_name, subject in unlabeled_target_sets:
prompt = unlabeled_prompts[prompt_name]["prompt"]
samples = [{"input": format(prompt, subject=s)} for s in subjects[subject]]
file_name = f"{data_dir}/{prompt_name}_{subject}.jsonl"
file_name = os.path.join(data_dir, f"{prompt_name}_{subject}.jsonl")
# save samples jsonl
with open(file_name, "wb") as f:
for sample in samples:
# f.write(json.dumps(sample) + "\n")
json_data = json.dumps(sample, ensure_ascii=False)
f.write(json_data.encode("utf-8"))
f.write(b"\n")
Expand All @@ -204,10 +206,12 @@ def format(template: str, **kwargs: str) -> str:
)
evals += [f"mg-{prompt_name}-{subject}: {file_name}"]


yaml_file = f"{REGISTRY_PATH}/evals/test-modelgraded-generated.yaml"
# Write the YAML file
yaml_file = os.path.join(REGISTRY_PATH, "evals/test-modelgraded-generated.yaml")
with open(yaml_file, "w") as yf:
yf.write(yaml_str)
print(f"wrote {yaml_file}")

# Print the evals
for e in evals:
print(e)
print(e)
15 changes: 11 additions & 4 deletions scripts/pattern_identification_generator.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
"""
Usage: python scripts/task_identification_generator.py
This script generates examples for a task identification challenge,
with the task being to identify patterns between a set of symbols and their resulting labels.
"""

import os
import json
import random
from typing import Literal

# Ensure consistent results across runs
random.seed(42)

SYMBOLS = list("abcdefghijklmnopqrstuvwxyz")
Expand All @@ -15,15 +19,15 @@
)
TASK_NAME = "pattern_identification"


# This function generates an example symbol set and its corresponding label
def generate_example() -> tuple[str, list[str], Literal["foo", "bar"]]:
num_symbols = int(len(SYMBOLS) / 2)
target_symbol = random.choice(SYMBOLS)
symbol_list = random.sample(SYMBOLS, num_symbols)
target: Literal["foo", "bar"] = "foo" if target_symbol in symbol_list else "bar"
return (target_symbol, symbol_list, target)


# This function generates a string of multiple examples, used to give a user multiple attempts to identify the pattern
def generate_exemplars_str(num_exemplars: int = 8) -> str:
exemplars = [generate_example() for _ in range(num_exemplars)]
exemplars_str = [
Expand All @@ -32,7 +36,7 @@ def generate_exemplars_str(num_exemplars: int = 8) -> str:
]
return "\n".join([INSTRUCTION] + exemplars_str)


# This function generates a set of evaluation examples and their corresponding labels
def generate_eval_examples(
num_eval_examples: int = 250,
) -> tuple[list[str], list[Literal["foo", "bar"]]]:
Expand All @@ -47,7 +51,10 @@ def generate_eval_examples(

if __name__ == "__main__":
eval_examples_str, targets = generate_eval_examples()
output_path = f"evals/registry/data/{TASK_NAME}/samples.v0.jsonl"

# Generate the output path in a OS-agnostic manner
output_path = os.path.join("evals", "registry", "data", TASK_NAME, "samples.v0.jsonl")

with open(output_path, "w") as writer:
for eval_example_str, target in zip(eval_examples_str, targets):
d = {
Expand Down

0 comments on commit ddd76bf

Please sign in to comment.