Strong-AI-Lab
diff --git a/‎Baseline Experiment/Zero-shot CoT/GPT-4.py
+2-2 b/‎Baseline Experiment/Zero-shot CoT/GPT-4.py
+2-2
diff --git a/‎CONCEPTRULESV1/baseline_Llama2-7B.py
+67 b/‎CONCEPTRULESV1/baseline_Llama2-7B.py
+67
diff --git a/‎CONCEPTRULESV1/zero-shot_CoT_3.5.py
+77 b/‎CONCEPTRULESV1/zero-shot_CoT_3.5.py
+77
diff --git a/‎CONCEPTRULESV1/zero-shot_CoT_4.py
+76 b/‎CONCEPTRULESV1/zero-shot_CoT_4.py
+76
diff --git a/‎CONCEPTRULESV1/zero-shot_CoT_Llama2.py b/‎CONCEPTRULESV1/zero-shot_CoT_Llama2.py
diff --git a/‎CONCEPTRULESV2/baseline_Llama2-7B.py
+20-28 b/‎CONCEPTRULESV2/baseline_Llama2-7B.py
+20-28
diff --git a/‎CONCEPTRULESV2/zero-shot_CoT_3.5.py b/‎CONCEPTRULESV2/zero-shot_CoT_3.5.py
diff --git a/‎CONCEPTRULESV2/zero-shot_CoT_4.py b/‎CONCEPTRULESV2/zero-shot_CoT_4.py
diff --git a/‎CONCEPTRULESV2/zero-shot_CoT_Llama2.py b/‎CONCEPTRULESV2/zero-shot_CoT_Llama2.py
@@ -4,7 +4,7 @@
 import re
 import os
 
-def ai_function_generation(demo, context, question, model = "gpt-3.5-turbo"):
+def ai_function_generation(demo, context, question, model = "gpt-4"):
     # parse args to comma separated string
     messages = [{"role": "system",
                 "content": demo},
@@ -19,7 +19,7 @@ def ai_function_generation(demo, context, question, model = "gpt-3.5-turbo"):
 
     return response.choices[0].message["content"]
 
-def ai_function_cot_part2(demo, context, model = "gpt-3.5-turbo"):
+def ai_function_cot_part2(demo, context, model = "gpt-4"):
     # parse args to comma separated string
     messages = [{"role": "system",
                 "content": demo},
 
@@ -0,0 +1,67 @@
+from transformers import AutoTokenizer, pipeline
+import transformers
+import torch
+import re
+import json
+import csv
+import random
+
+model = "meta-llama/Llama-2-7b-chat-hf"
+
+def remove_spaces(text):
+    # Replace multiple spaces with a single space
+    text = re.sub(r' +', ' ', text)
+    # Remove leading and trailing spaces from each line
+    text = re.sub(r'^ +| +$', '', text, flags=re.MULTILINE)
+    return text
+
+template = {
+    "ConceptRules_baseline": remove_spaces("""
+    Please help me complete a multi-step logical reasoning task. 
+    Please help me answer whether the question is correct or not based on the facts and rules formed by these natural language propositions. 
+    You should just return me one number as the final answer (1 for true and 0 for wrong) and also provide reasoning process. 
+    The Propositions and Questions are as follows: \n""")
+}
+
+tokenizer = AutoTokenizer.from_pretrained(model)
+pipeline = transformers.pipeline(
+    "text-generation",
+    model=model,
+    torch_dtype=torch.float16,
+    device_map="auto",
+)
+
+def batch_process(text):
+    sequences = pipeline(
+        text,
+        do_sample=True,
+        top_k=10,
+        num_return_sequences=1,
+        eos_token_id=tokenizer.eos_token_id,
+        max_length=2048,
+    )
+    return sequences[0]['generated_text']
+
+jsonl_file = "ConceptRulesV2/conceptrules_v2_full_train.jsonl"
+
+# First read all entries into a list
+all_entries = []
+with open(jsonl_file, "r", encoding="utf-8") as file:
+    for line in file:
+        all_entries.append(json.loads(line))
+
+# Randomly select 100 entries
+selected_entries = random.sample(all_entries, 100)
+
+# Process the selected entries
+with open("Llama2-7B.csv", "w", newline="", encoding="utf-8") as csv_file:
+    csv_writer = csv.writer(csv_file)
+    csv_writer.writerow(["question_id", "response", "label"])  # Write header
+
+    for entry in selected_entries:
+        context = entry["context"]
+        first_question = entry["questions"][0]  # Process only the first question
+        question_text = first_question["text"]
+        label = first_question["label"]
+        responses = batch_process(f"Instructions: ```{template['ConceptRules_baseline']}```Propositions: ```{context}```\nQuestion: ```{question_text}```")
+        csv_writer.writerow([first_question["id"], responses, label])
@@ -0,0 +1,77 @@
+import openai
+import json
+import csv
+import re
+import os
+import random
+
+def ai_function_generation(demo, context, question, model="gpt-3.5-turbo"):
+    messages = [{"role": "system", "content": demo},
+                {"role": "user", "content": f"Propositions: ```{context}```\nQuestion: ```{question}```"}]
+
+    response = openai.ChatCompletion.create(
+        model=model,
+        messages=messages,
+        temperature=0
+    )
+
+    return response.choices[0].message["content"]
+
+def ai_function_cot_part2(demo, context, model="gpt-3.5-turbo"):
+    messages = [{"role": "system", "content": demo},
+                {"role": "user", "content": f"```{context}```"}]
+
+    response = openai.ChatCompletion.create(
+        model=model,
+        messages=messages,
+        temperature=0
+    )
+
+    return response.choices[0].message["content"]
+
+def remove_spaces(text):
+    text = re.sub(r' +', ' ', text)
+    text = re.sub(r'^ +| +$', '', text, flags=re.MULTILINE)
+    return text
+
+template = {
+    "zero-shot-CoT-part1": remove_spaces("""
+    Please help me complete this multi-step logical reasoning task. 
+    Answer whether this question is correct based on the propositions about facts and rules formed by these natural language propositions. 
+    You should think through the question step by step, and show your full process. \n"""),
+    "zero-shot-CoT-part2": remove_spaces("""
+    Based on this thought process, please help me sum up only a number as the final answer (1 represents correct, 0 represents wrong).""")
+}
+
+openai.api_key = os.getenv("OPENAI_API_KEY")
+
+def ZeroShotCoT_call1(demo, context, question, model="gpt-3.5-turbo"):
+    return ai_function_generation(demo, context, question, model)
+
+def ZeroShotCoT_call2(demo, context, model="gpt-3.5-turbo"):
+    return ai_function_cot_part2(demo, context, model)
+
+jsonl_file = "ConceptRules/conceptrules_full_train.jsonl"
+
+all_entries = []
+with open(jsonl_file, "r", encoding="utf-8") as file:
+    for line in file:
+        all_entries.append(json.loads(line))
+
+# Randomly select 100 entries from the loaded data
+selected_entries = random.sample(all_entries, 100)
+
+with open("V1-zeroshot-cot-3.5.csv", "w", newline="", encoding="utf-8") as csv_file:
+    csv_writer = csv.writer(csv_file)
+    csv_writer.writerow(["id", "response", "label"])  # Write header
+
+    for entry in selected_entries:
+        context = entry["context"]
+        first_question = entry["questions"][0]  # Assuming the structure is similar
+        question_text = first_question["text"]
+        label = first_question["label"]
+        response_part_1 = ZeroShotCoT_call1(template['zero-shot-CoT-part1'], context, question_text)
+        print(response_part_1)
+        response_part_2 = ZeroShotCoT_call2(template['zero-shot-CoT-part2'], response_part_1)
+        print("Writing to CSV:", [first_question["id"], response_part_2, label])
+        csv_writer.writerow([first_question["id"], response_part_2, label])
@@ -0,0 +1,76 @@
+import openai
+import json
+import csv
+import re
+import os
+import random
+
+def ai_function_generation(demo, context, question, model="gpt-4-1106-preview"):
+    messages = [{"role": "system", "content": demo},
+                {"role": "user", "content": f"Propositions: ```{context}```\nQuestion: ```{question}```"}]
+
+    response = openai.ChatCompletion.create(
+        model=model,
+        messages=messages,
+        temperature=0
+    )
+
+    return response.choices[0].message["content"]
+
+def ai_function_cot_part2(demo, context, model="gpt-4-1106-preview"):
+    messages = [{"role": "system", "content": demo},
+                {"role": "user", "content": f"```{context}```"}]
+
+    response = openai.ChatCompletion.create(
+        model=model,
+        messages=messages,
+        temperature=0
+    )
+
+    return response.choices[0].message["content"]
+
+def remove_spaces(text):
+    text = re.sub(r' +', ' ', text)
+    text = re.sub(r'^ +| +$', '', text, flags=re.MULTILINE)
+    return text
+
+template = {
+    "zero-shot-CoT-part1": remove_spaces("""
+    Please help me complete this multi-step logical reasoning task. 
+    Answer whether this question is correct based on the propositions about facts and rules formed by these natural language propositions. 
+    You should think through the question step by step, and show your full process. \n"""),
+    "zero-shot-CoT-part2": remove_spaces("""
+    Based on this thought process, please help me sum up only a number as the final answer (1 represents correct, 0 represents wrong).""")
+}
+
+openai.api_key = os.getenv("OPENAI_API_KEY")
+
+def ZeroShotCoT_call1(demo, context, question, model="gpt-4-1106-preview"):
+    return ai_function_generation(demo, context, question, model)
+
+def ZeroShotCoT_call2(demo, context, model="gpt-4-1106-preview"):
+    return ai_function_cot_part2(demo, context, model)
+
+jsonl_file = "ConceptRules/conceptrules_full_train.jsonl"
+
+all_entries = []
+with open(jsonl_file, "r", encoding="utf-8") as file:
+    for line in file:
+        all_entries.append(json.loads(line))
+
+# Randomly select 100 entries from the loaded data
+selected_entries = random.sample(all_entries, 100)
+
+with open("V1-zeroshot-cot-4.csv", "w", newline="", encoding="utf-8") as csv_file:
+    csv_writer = csv.writer(csv_file)
+    csv_writer.writerow(["id", "response", "label"])  # Write header
+
+    for entry in selected_entries:
+        context = entry["context"]
+        first_question = entry["questions"][0]  # Assuming the structure is similar
+        question_text = first_question["text"]
+        label = first_question["label"]
+        response_part_1 = ZeroShotCoT_call1(template['zero-shot-CoT-part1'], context, question_text)
+        print(response_part_1)
+        response_part_2 = ZeroShotCoT_call2(template['zero-shot-CoT-part2'], response_part_1)
+        csv_writer.writerow([first_question["id"], response_part_2, label])
@@ -1,9 +1,10 @@
-from transformers import AutoTokenizer
+from transformers import AutoTokenizer, pipeline
 import transformers
 import torch
 import re
 import json
 import csv
+import random
 
 model = "meta-llama/Llama-2-7b-chat-hf"
 
@@ -30,7 +31,6 @@ def remove_spaces(text):
     device_map="auto",
 )
 
-
 def batch_process(text):
     sequences = pipeline(
         text,
@@ -42,34 +42,26 @@ def batch_process(text):
     )
     return sequences[0]['generated_text']
 
+jsonl_file = "ConceptRulesV2/conceptrules_v2_full_train.jsonl"
+
+# First read all entries into a list
+all_entries = []
+with open(jsonl_file, "r", encoding="utf-8") as file:
+    for line in file:
+        all_entries.append(json.loads(line))
 
-# List of json file names
-json_files = [
-    "../PARARULE_plus_step2_Animal_sample.json",
-    "../PARARULE_plus_step3_Animal_sample.json",
-    "../PARARULE_plus_step4_Animal_sample.json",
-    "../PARARULE_plus_step5_Animal_sample.json",
-    "../PARARULE_plus_step2_People_sample.json",
-    "../PARARULE_plus_step3_People_sample.json",
-    "../PARARULE_plus_step4_People_sample.json",
-    "../PARARULE_plus_step5_People_sample.json"
-]
+# Randomly select 100 entries
+selected_entries = random.sample(all_entries, 100)
 
-# Open the CSV file for writing
+# Process the selected entries
 with open("Llama2-7B.csv", "w", newline="", encoding="utf-8") as csv_file:
     csv_writer = csv.writer(csv_file)
-    csv_writer.writerow(["step", "return", "label"])  # Write header
-
-    for json_file in json_files:
-        step = '_'.join(json_file.split("_")[2:4])
-        with open(json_file, "r", encoding="utf-8") as f:
-            data = json.load(f)
-            for entry in data:
-                context = entry["context"]
-                question = entry["question"]
-                label = entry["label"]
-                # Replace this with your actual function call
-                responses = batch_process(f"Instructions: ```{template['Llama2_baseline']}```Propositions: ```{context}```\nQuestion: ```{question}```")
-
-                csv_writer.writerow([step, responses, label])
+    csv_writer.writerow(["question_id", "response", "label"])  # Write header
 
+    for entry in selected_entries:
+        context = entry["context"]
+        for question in entry["questions"]:
+            question_text = question["text"]
+            label = question["label"]
+            responses = batch_process(f"Instructions: ```{template['ConceptRules_baseline']}```Propositions: ```{context}```\nQuestion: ```{question_text}```")
+            csv_writer.writerow([question["id"], responses, label])