forked from lm-sys/FastChat
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_e2e_generation.py
94 lines (65 loc) · 3.59 KB
/
run_e2e_generation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import os
import json
import argparse
import time
import pandas as pd
import requests
import aiobotocore.session
import s3fs
import random
from datasets import Dataset, DatasetDict, load_from_disk, load_dataset
from requests.auth import HTTPBasicAuth
def parse_arguments():
parser = argparse.ArgumentParser(description='Generate code using VLLM')
# parser.add_argument("--num_gpus", type=int, default=1, help="Number of GPUs to use")
# parser.add_argument("--tmp_generation_dir", type=str, default="vllm_few_shot_examples", help="Directory to save temporary generations to")
# parser.add_argument('--s3_dataset_path', type=str, help='Path to read dataset from S3. Should end in /dataset/')
# parser.add_argument('--generation_dir', type=str, help='Directory to save generations to. Will be prepended by ~/bigcode-evaluation-harness/bigcode_eval/tasks/')
parser.add_argument('--model', type=str, default="codellama/CodeLlama-7b-Instruct-hf", help='Name of the hf model to be used for generation')
parser.add_argument('--model_id_prefix', type=str, default="code-llama-7b-instruct", help='Name you want to save the model as. ')
parser.add_argument('--openai_key', type=str, default=None)
# parser.add_argument('--use_chat_template', action="store_true", help='Use the chat template for generation')
return parser.parse_args()
def get_s3fs():
s3_session = aiobotocore.session.AioSession(profile="ml-worker")
storage_options = {"session": s3_session}
fs = s3fs.S3FileSystem(**storage_options)
return fs
def save_ds_s3(ds, path: str):
fs = get_s3fs()
ds.save_to_disk(path, storage_options=fs.storage_options)
def load_ds_s3(path: str):
fs = get_s3fs()
dataset = load_from_disk(path, storage_options=fs.storage_options)
return dataset
def main():
full_start_time = time.time()
args = parse_arguments()
# Adding code
# Get the example from s3 dataset... loop over examples
s3_dataset = load_from_disk("/tmp/mt-bench-dataset/")
train_ds = s3_dataset["train"]
messages = train_ds["messages"]
zeroshot_baseline_cmd = f'python gen_api_answer.py --model {args.model} --openai-api-base http://localhost:8000/v1 --parallel 50 --openai_key {args.openai_key}'
print(f"The command is: {zeroshot_baseline_cmd}")
os.system(zeroshot_baseline_cmd)
for idx, example in enumerate(messages):
# Write example to a temporary file
tmp_file = f"/tmp/example_{idx}.json"
with open(tmp_file, 'w') as f:
json.dump(example, f)
# messages = json.loads(example["transformed_task"])["messages"]
# messages = '[{"role": "user", "content": "This is a test user message 1."}, {"role": "assistant", "content": "This is a test assistant message 1."}, {"role": "user", "content": "This is a test user message 2."}, {"role": "assistant", "content": "This is a test assistant message 2."}]'
model_id = f"{args.model_id_prefix}-{idx}"
# model_id = f"{args.model_id_prefix}-0"
# cmd = f'CUDA_VISIBLE_DEVICES=6,7 python gen_model_answer.py --model-path {args.model_path} --model-id {model_id} --one_shot_example {tmp_file}'
cmd = f'python gen_api_answer.py --model {args.model} --openai-api-base http://localhost:8000/v1 --parallel 50 --one_shot_example {tmp_file} --openai_key {args.openai_key} --index {idx}'
print(f"The command is: {cmd}")
os.system(cmd)
# Delete the temporary file
os.remove(tmp_file)
# return
full_end_time = time.time()
print(f"Total time taken: {full_end_time - full_start_time}")
if __name__ == "__main__":
main()