Skip to content

Commit 5393c32

Browse files
author
Wzs010429
committed
add baseline
1 parent 84945dc commit 5393c32

File tree

9 files changed

+242
-30
lines changed

9 files changed

+242
-30
lines changed

Baseline Experiment/Zero-shot CoT/GPT-4.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import re
55
import os
66

7-
def ai_function_generation(demo, context, question, model = "gpt-3.5-turbo"):
7+
def ai_function_generation(demo, context, question, model = "gpt-4"):
88
# parse args to comma separated string
99
messages = [{"role": "system",
1010
"content": demo},
@@ -19,7 +19,7 @@ def ai_function_generation(demo, context, question, model = "gpt-3.5-turbo"):
1919

2020
return response.choices[0].message["content"]
2121

22-
def ai_function_cot_part2(demo, context, model = "gpt-3.5-turbo"):
22+
def ai_function_cot_part2(demo, context, model = "gpt-4"):
2323
# parse args to comma separated string
2424
messages = [{"role": "system",
2525
"content": demo},

CONCEPTRULESV1/baseline_Llama2-7B.py

+67
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
from transformers import AutoTokenizer, pipeline
2+
import transformers
3+
import torch
4+
import re
5+
import json
6+
import csv
7+
import random
8+
9+
model = "meta-llama/Llama-2-7b-chat-hf"
10+
11+
def remove_spaces(text):
12+
# Replace multiple spaces with a single space
13+
text = re.sub(r' +', ' ', text)
14+
# Remove leading and trailing spaces from each line
15+
text = re.sub(r'^ +| +$', '', text, flags=re.MULTILINE)
16+
return text
17+
18+
template = {
19+
"ConceptRules_baseline": remove_spaces("""
20+
Please help me complete a multi-step logical reasoning task.
21+
Please help me answer whether the question is correct or not based on the facts and rules formed by these natural language propositions.
22+
You should just return me one number as the final answer (1 for true and 0 for wrong) and also provide reasoning process.
23+
The Propositions and Questions are as follows: \n""")
24+
}
25+
26+
tokenizer = AutoTokenizer.from_pretrained(model)
27+
pipeline = transformers.pipeline(
28+
"text-generation",
29+
model=model,
30+
torch_dtype=torch.float16,
31+
device_map="auto",
32+
)
33+
34+
def batch_process(text):
35+
sequences = pipeline(
36+
text,
37+
do_sample=True,
38+
top_k=10,
39+
num_return_sequences=1,
40+
eos_token_id=tokenizer.eos_token_id,
41+
max_length=2048,
42+
)
43+
return sequences[0]['generated_text']
44+
45+
jsonl_file = "ConceptRulesV2/conceptrules_v2_full_train.jsonl"
46+
47+
# First read all entries into a list
48+
all_entries = []
49+
with open(jsonl_file, "r", encoding="utf-8") as file:
50+
for line in file:
51+
all_entries.append(json.loads(line))
52+
53+
# Randomly select 100 entries
54+
selected_entries = random.sample(all_entries, 100)
55+
56+
# Process the selected entries
57+
with open("Llama2-7B.csv", "w", newline="", encoding="utf-8") as csv_file:
58+
csv_writer = csv.writer(csv_file)
59+
csv_writer.writerow(["question_id", "response", "label"]) # Write header
60+
61+
for entry in selected_entries:
62+
context = entry["context"]
63+
first_question = entry["questions"][0] # Process only the first question
64+
question_text = first_question["text"]
65+
label = first_question["label"]
66+
responses = batch_process(f"Instructions: ```{template['ConceptRules_baseline']}```Propositions: ```{context}```\nQuestion: ```{question_text}```")
67+
csv_writer.writerow([first_question["id"], responses, label])

CONCEPTRULESV1/zero-shot_CoT_3.5.py

+77
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
import openai
2+
import json
3+
import csv
4+
import re
5+
import os
6+
import random
7+
8+
def ai_function_generation(demo, context, question, model="gpt-3.5-turbo"):
9+
messages = [{"role": "system", "content": demo},
10+
{"role": "user", "content": f"Propositions: ```{context}```\nQuestion: ```{question}```"}]
11+
12+
response = openai.ChatCompletion.create(
13+
model=model,
14+
messages=messages,
15+
temperature=0
16+
)
17+
18+
return response.choices[0].message["content"]
19+
20+
def ai_function_cot_part2(demo, context, model="gpt-3.5-turbo"):
21+
messages = [{"role": "system", "content": demo},
22+
{"role": "user", "content": f"```{context}```"}]
23+
24+
response = openai.ChatCompletion.create(
25+
model=model,
26+
messages=messages,
27+
temperature=0
28+
)
29+
30+
return response.choices[0].message["content"]
31+
32+
def remove_spaces(text):
33+
text = re.sub(r' +', ' ', text)
34+
text = re.sub(r'^ +| +$', '', text, flags=re.MULTILINE)
35+
return text
36+
37+
template = {
38+
"zero-shot-CoT-part1": remove_spaces("""
39+
Please help me complete this multi-step logical reasoning task.
40+
Answer whether this question is correct based on the propositions about facts and rules formed by these natural language propositions.
41+
You should think through the question step by step, and show your full process. \n"""),
42+
"zero-shot-CoT-part2": remove_spaces("""
43+
Based on this thought process, please help me sum up only a number as the final answer (1 represents correct, 0 represents wrong).""")
44+
}
45+
46+
openai.api_key = os.getenv("OPENAI_API_KEY")
47+
48+
def ZeroShotCoT_call1(demo, context, question, model="gpt-3.5-turbo"):
49+
return ai_function_generation(demo, context, question, model)
50+
51+
def ZeroShotCoT_call2(demo, context, model="gpt-3.5-turbo"):
52+
return ai_function_cot_part2(demo, context, model)
53+
54+
jsonl_file = "ConceptRules/conceptrules_full_train.jsonl"
55+
56+
all_entries = []
57+
with open(jsonl_file, "r", encoding="utf-8") as file:
58+
for line in file:
59+
all_entries.append(json.loads(line))
60+
61+
# Randomly select 100 entries from the loaded data
62+
selected_entries = random.sample(all_entries, 100)
63+
64+
with open("V1-zeroshot-cot-3.5.csv", "w", newline="", encoding="utf-8") as csv_file:
65+
csv_writer = csv.writer(csv_file)
66+
csv_writer.writerow(["id", "response", "label"]) # Write header
67+
68+
for entry in selected_entries:
69+
context = entry["context"]
70+
first_question = entry["questions"][0] # Assuming the structure is similar
71+
question_text = first_question["text"]
72+
label = first_question["label"]
73+
response_part_1 = ZeroShotCoT_call1(template['zero-shot-CoT-part1'], context, question_text)
74+
print(response_part_1)
75+
response_part_2 = ZeroShotCoT_call2(template['zero-shot-CoT-part2'], response_part_1)
76+
print("Writing to CSV:", [first_question["id"], response_part_2, label])
77+
csv_writer.writerow([first_question["id"], response_part_2, label])

CONCEPTRULESV1/zero-shot_CoT_4.py

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import openai
2+
import json
3+
import csv
4+
import re
5+
import os
6+
import random
7+
8+
def ai_function_generation(demo, context, question, model="gpt-4-1106-preview"):
9+
messages = [{"role": "system", "content": demo},
10+
{"role": "user", "content": f"Propositions: ```{context}```\nQuestion: ```{question}```"}]
11+
12+
response = openai.ChatCompletion.create(
13+
model=model,
14+
messages=messages,
15+
temperature=0
16+
)
17+
18+
return response.choices[0].message["content"]
19+
20+
def ai_function_cot_part2(demo, context, model="gpt-4-1106-preview"):
21+
messages = [{"role": "system", "content": demo},
22+
{"role": "user", "content": f"```{context}```"}]
23+
24+
response = openai.ChatCompletion.create(
25+
model=model,
26+
messages=messages,
27+
temperature=0
28+
)
29+
30+
return response.choices[0].message["content"]
31+
32+
def remove_spaces(text):
33+
text = re.sub(r' +', ' ', text)
34+
text = re.sub(r'^ +| +$', '', text, flags=re.MULTILINE)
35+
return text
36+
37+
template = {
38+
"zero-shot-CoT-part1": remove_spaces("""
39+
Please help me complete this multi-step logical reasoning task.
40+
Answer whether this question is correct based on the propositions about facts and rules formed by these natural language propositions.
41+
You should think through the question step by step, and show your full process. \n"""),
42+
"zero-shot-CoT-part2": remove_spaces("""
43+
Based on this thought process, please help me sum up only a number as the final answer (1 represents correct, 0 represents wrong).""")
44+
}
45+
46+
openai.api_key = os.getenv("OPENAI_API_KEY")
47+
48+
def ZeroShotCoT_call1(demo, context, question, model="gpt-4-1106-preview"):
49+
return ai_function_generation(demo, context, question, model)
50+
51+
def ZeroShotCoT_call2(demo, context, model="gpt-4-1106-preview"):
52+
return ai_function_cot_part2(demo, context, model)
53+
54+
jsonl_file = "ConceptRules/conceptrules_full_train.jsonl"
55+
56+
all_entries = []
57+
with open(jsonl_file, "r", encoding="utf-8") as file:
58+
for line in file:
59+
all_entries.append(json.loads(line))
60+
61+
# Randomly select 100 entries from the loaded data
62+
selected_entries = random.sample(all_entries, 100)
63+
64+
with open("V1-zeroshot-cot-4.csv", "w", newline="", encoding="utf-8") as csv_file:
65+
csv_writer = csv.writer(csv_file)
66+
csv_writer.writerow(["id", "response", "label"]) # Write header
67+
68+
for entry in selected_entries:
69+
context = entry["context"]
70+
first_question = entry["questions"][0] # Assuming the structure is similar
71+
question_text = first_question["text"]
72+
label = first_question["label"]
73+
response_part_1 = ZeroShotCoT_call1(template['zero-shot-CoT-part1'], context, question_text)
74+
print(response_part_1)
75+
response_part_2 = ZeroShotCoT_call2(template['zero-shot-CoT-part2'], response_part_1)
76+
csv_writer.writerow([first_question["id"], response_part_2, label])

CONCEPTRULESV1/zero-shot_CoT_Llama2.py

Whitespace-only changes.

CONCEPTRULESV2/baseline_Llama2-7B.py

+20-28
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
from transformers import AutoTokenizer
1+
from transformers import AutoTokenizer, pipeline
22
import transformers
33
import torch
44
import re
55
import json
66
import csv
7+
import random
78

89
model = "meta-llama/Llama-2-7b-chat-hf"
910

@@ -30,7 +31,6 @@ def remove_spaces(text):
3031
device_map="auto",
3132
)
3233

33-
3434
def batch_process(text):
3535
sequences = pipeline(
3636
text,
@@ -42,34 +42,26 @@ def batch_process(text):
4242
)
4343
return sequences[0]['generated_text']
4444

45+
jsonl_file = "ConceptRulesV2/conceptrules_v2_full_train.jsonl"
46+
47+
# First read all entries into a list
48+
all_entries = []
49+
with open(jsonl_file, "r", encoding="utf-8") as file:
50+
for line in file:
51+
all_entries.append(json.loads(line))
4552

46-
# List of json file names
47-
json_files = [
48-
"../PARARULE_plus_step2_Animal_sample.json",
49-
"../PARARULE_plus_step3_Animal_sample.json",
50-
"../PARARULE_plus_step4_Animal_sample.json",
51-
"../PARARULE_plus_step5_Animal_sample.json",
52-
"../PARARULE_plus_step2_People_sample.json",
53-
"../PARARULE_plus_step3_People_sample.json",
54-
"../PARARULE_plus_step4_People_sample.json",
55-
"../PARARULE_plus_step5_People_sample.json"
56-
]
53+
# Randomly select 100 entries
54+
selected_entries = random.sample(all_entries, 100)
5755

58-
# Open the CSV file for writing
56+
# Process the selected entries
5957
with open("Llama2-7B.csv", "w", newline="", encoding="utf-8") as csv_file:
6058
csv_writer = csv.writer(csv_file)
61-
csv_writer.writerow(["step", "return", "label"]) # Write header
62-
63-
for json_file in json_files:
64-
step = '_'.join(json_file.split("_")[2:4])
65-
with open(json_file, "r", encoding="utf-8") as f:
66-
data = json.load(f)
67-
for entry in data:
68-
context = entry["context"]
69-
question = entry["question"]
70-
label = entry["label"]
71-
# Replace this with your actual function call
72-
responses = batch_process(f"Instructions: ```{template['Llama2_baseline']}```Propositions: ```{context}```\nQuestion: ```{question}```")
73-
74-
csv_writer.writerow([step, responses, label])
59+
csv_writer.writerow(["question_id", "response", "label"]) # Write header
7560

61+
for entry in selected_entries:
62+
context = entry["context"]
63+
for question in entry["questions"]:
64+
question_text = question["text"]
65+
label = question["label"]
66+
responses = batch_process(f"Instructions: ```{template['ConceptRules_baseline']}```Propositions: ```{context}```\nQuestion: ```{question_text}```")
67+
csv_writer.writerow([question["id"], responses, label])

CONCEPTRULESV2/zero-shot_CoT_3.5.py

Whitespace-only changes.

CONCEPTRULESV2/zero-shot_CoT_4.py

Whitespace-only changes.

CONCEPTRULESV2/zero-shot_CoT_Llama2.py

Whitespace-only changes.

0 commit comments

Comments
 (0)