Skip to content

Commit 0206b8d

Browse files
committed
create llama2-7B lora finetune shell command; complete part of the Chatlogic framework
1 parent 4216668 commit 0206b8d

8 files changed

+66
-30
lines changed

Diff for: Baseline Experiment/Llama_2_7B_Finetune/Alpaca_data_processing.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,18 @@ def main():
2525
# load data
2626
dataset = load_dataset("qbao775/PARARULE-Plus")
2727

28+
# Define limits for each split
29+
limits = {
30+
"train": 10000,
31+
"validation": 3000,
32+
"test": 3000
33+
}
34+
2835
# Convert each data slice and save it separately
2936
for split, filename in zip(["train", "validation", "test"], ["train.json", "val.json", "test.json"]):
30-
transformed_data = transform_data(list(dataset[split]))
37+
# Slice the dataset
38+
data_slice = dataset[split]
39+
transformed_data = transform_data(list(data_slice))
3140
save_to_json(transformed_data, filename)
3241
print(f"Transformed and saved {split} data to {filename}.")
3342

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
ssh https://github.com/tloen/alpaca-lora.git
2+
# put the data "./Alpaca_PARARULE-Plus.json" into the directory same as the github repo
3+
python finetune.py \
4+
--base_model 'meta-llama/Llama-2-7b-hf' \
5+
--data_path './Alpaca_PARARULE-Plus.json' \
6+
--output_dir './lora-alpaca' \
7+
--batch_size 128 \
8+
--micro_batch_size 4 \
9+
--num_epochs 10 \
10+
--learning_rate 1e-4 \
11+
--cutoff_len 512 \
12+
--val_set_size 2000 \
13+
--lora_r 8 \
14+
--lora_alpha 16 \
15+
--lora_dropout 0.05 \
16+
--lora_target_modules '[q_proj,v_proj]' \
17+
--train_on_inputs \
18+
--group_by_length

Diff for: Baseline Experiment/Llama_2_7B_Finetune/get_data_PARARULE-Plus.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
data = json.load(f)
77

88
# Randomly sample 5000 entries
9-
sampled_data = random.sample(data, 5000)
9+
sampled_data = random.sample(data, 10000)
1010

1111
# Save the sampled data
1212
with open('Alpaca_PARARULE-Plus.json', 'w', encoding='utf-8') as f:

Diff for: call_openai_API.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
11
import openai
2+
import os
3+
4+
openai.api_key = api_key = os.getenv("OPENAI_API_KEY")
5+
26

37
def ai_function_generation(demo, context, question, requirements, model = "gpt-3.5-turbo"):
48
# parse args to comma separated string
@@ -18,7 +22,7 @@ def ai_function_generation(demo, context, question, requirements, model = "gpt-3
1822
def ai_generation_adjustment(demo, code, error_message, model = "gpt-3.5-turbo"):
1923
# parse args to comma separated string
2024
messages = [{"role": "user",
21-
"content": f"{demo}\n Here is the original code: ```{code}```\n And the exception that was thrown is: ```{error_message}```"}]
25+
"content": f"{demo}\n Here is the original code: ```{code}```\n And the exception that was thrown is: ```{error_message}```"}]
2226

2327
response = openai.ChatCompletion.create(
2428
model=model,
@@ -31,7 +35,7 @@ def ai_generation_adjustment(demo, code, error_message, model = "gpt-3.5-turbo")
3135
def ai_generation_check(demo, question, model = "gpt-3.5-turbo"):
3236
# parse args to comma separated string
3337
messages = [{"role": "user",
34-
"content": f"{demo}\n The sentence you are expected to decide is: ```{question}```"}]
38+
"content": f"{demo}\n The sentence you are expected to decide is: ```{question}```"}]
3539

3640
response = openai.ChatCompletion.create(
3741
model=model,

Diff for: complete_reasoning.py renamed to complete_reasoning_3.5.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@
22
import call_openai_API
33
import templates
44
import openai
5-
import openai_API_keys
65
import subprocess
76
import csv
7+
import os
88

99
# Initialize the OpenAI API client
10-
openai.api_key = openai_API_keys.OPENAI_API_KEY
10+
openai.api_key = api_key = os.getenv("OPENAI_API_KEY")
1111
#Define the file name
1212
JSON_filename = 'PARARULE_plus_step2_People_sample.json'
1313
PY_filename = 'pyDatalog_processing.py'
@@ -36,19 +36,19 @@ def check_pos_neg(string):
3636
return None
3737

3838
def Judgement(demo, question, model):
39-
result_string = call_openai_API.ai_generation_check(demo, question, model)
39+
result_string = call_openai_API.ai_generation_check(demo, question, model = "gpt-3.5-turbo")
4040
return result_string
4141

4242

4343
# Complete Communication with ChatGPT
44-
def Generation(demo, context, question, requirements, model):
44+
def Generation(demo, context, question, requirements, model = "gpt-3.5-turbo"):
4545

4646
result_string = call_openai_API.ai_function_generation(demo, context, question, requirements, model)
4747
return result_string
4848

4949
# Communication(templates.templates["agent_engineer"], PARARULE_Plus.PARARULE_Plus_dataset['train'][200]['context'], PARARULE_Plus.PARARULE_Plus_dataset['train'][200]['question'], templates.templates["no_extra_content"], "gpt-3.5-turbo")
5050

51-
def Adjustment(demo, code, error_message, model):
51+
def Adjustment(demo, code, error_message, model = "gpt-3.5-turbo"):
5252

5353
result_string = call_openai_API.ai_generation_adjustment(demo, code, error_message, model)
5454
return result_string
@@ -79,12 +79,12 @@ def write_record(filename, id, value, code, step, flag):
7979
with open(JSON_filename, 'r') as file:
8080
data = json.load(file)
8181

82-
correct_num = 0
82+
correct_num = 10
8383
for i in range(0, 1):
8484
try:
8585
result_string = extract_string(Generation(templates.templates["agent_engineer"], data[i]['context'],
8686
data[i]['question'],
87-
templates.templates["no_extra_content"], "gpt-3.5-turbo"))
87+
templates.templates["no_extra_content"]))
8888
print(result_string)
8989
with open(PY_filename, 'w') as file:
9090
file.write("{}".format(result_string))
@@ -93,7 +93,7 @@ def write_record(filename, id, value, code, step, flag):
9393
flag = 0
9494
while(output.strip() != '1' and output.strip() != '0'):
9595
result_string = extract_string(Adjustment(templates.templates["adjustment_agent"],
96-
result_string, output, "gpt-3.5-turbo"))
96+
result_string, output))
9797
with open(PY_filename, 'w') as file:
9898
file.write("{}".format(result_string))
9999
print("reprocessing...")

Diff for: openai_API_keys.py

-1
This file was deleted.

Diff for: pyDatalog_processing.py

+22-16
Original file line numberDiff line numberDiff line change
@@ -2,33 +2,39 @@
22
from pyDatalog import pyDatalog
33
try:
44
# Declare the pyDatalog variables
5-
pyDatalog.create_terms('X, strong, big, thin, short, smart, rough, bad, huge, poor, quiet, wealthy, dull, nice, sad, kind')
5+
pyDatalog.create_terms('X, strong, huge, big, short, little, quiet, wealthy, smart, dull, rough, sad, thin, bad, kind, nice, poor, small')
66

77
# Define the facts
88
+strong('Dave')
9+
+huge('Dave')
910
+big('Dave')
10-
+thin('Charlie')
11-
+short('Charlie')
12-
+smart('Anne')
13-
+rough('Alan')
14-
+bad('Alan')
11+
+short('Gary')
12+
+little('Gary')
13+
+quiet('Bob')
14+
+wealthy('Bob')
15+
+smart('Bob')
16+
+dull('Harry')
17+
+rough('Harry')
18+
+sad('Harry')
1519

1620
# Define the rules
17-
rough(X) <= ~huge(X)
18-
quiet(X) <= ~poor(X)
19-
wealthy(X) <= smart(X)
20-
nice(X) <= wealthy(X) & ~dull(X)
21-
sad(X) <= rough(X) & ~huge(X)
22-
dull(X) <= thin(X) & short(X)
23-
bad(X) <= dull(X) & ~wealthy(X)
24-
kind(X) <= quiet(X)
21+
quiet(X) <= strong(X)
22+
thin(X) <= short(X) & little(X)
23+
bad(X) <= dull(X) & rough(X)
24+
kind(X) <= quiet(X) & wealthy(X)
25+
26+
# Define the constraints
27+
small(X) <= thin(X)
28+
wealthy(X) <= quiet(X)
29+
nice(X) <= kind(X)
30+
poor(X) <= bad(X)
2531

2632
# Query the knowledge base
27-
result = ~kind('Alan')
33+
result = ~small('Gary')
2834
if result:
2935
print(1)
3036
else:
3137
print(0)
3238
except Exception as e:
3339
traceback_info = traceback.format_exc()
34-
print(traceback_info)
40+
print(traceback_info)

Diff for: sample_extraction.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
with open(input_file, "r") as json_file:
1717
json_data_list = json.load(json_file)
1818

19-
random_sample = random.sample(json_data_list, 20)
19+
random_sample = random.sample(json_data_list, 40)
2020

2121
with open(output_file, "w") as output_file:
2222
json.dump(random_sample, output_file)

0 commit comments

Comments
 (0)