-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathacc_v_entropy.py
140 lines (107 loc) · 4.98 KB
/
acc_v_entropy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import pandas as pd
from tqdm import tqdm
import json
import random
import torch.nn.functional as F
import os
from semantic_uncertainty.calc_entropy import get_entropy_from_probabilities
from question_loader import *
import pickle
from utils import *
from LLM import get_next_token_fast
import numpy as np
##### SETTINGS #####
cache_dir = '/tmp'
model_name = "meta-llama/Meta-Llama-3-8B-Instruct"
possible_outputs = ["A", "B", "C", "D", "E", "F", "G", "H"]
batch_size = 8
redownload = False
data_outpath = './data/all_entropies'
######################
if redownload:
model_cache_path = os.path.join(cache_dir, model_name)
if os.path.exists(model_cache_path):
os.rmdir(model_cache_path)
# Turning into classifier
# Use longer responses, see which one is better --> logit bias or entailment w/out logit bias
# average log prob over sentence nromalized
# Semnatic entropy, [Prob of A, Prob of B...], [Diff of A, Diff of B...], embedding layer probs
# 73% - something in the prompt that makes model over confident
# Feeding in semantically similar prompts into model
# Sentence normalized average semantic entropy over all tokens...???
# One metric: How much performance improvement by ignoring low confidence samples
### TODO: Finish finding corr bewteen acc and entropy. Sensitivity vs entropy. Ability to be quantized (inference speed, important for medical) vs entropy?
## TODO: ADD AUROC for analysis!!! <<--- nvm, just add desntiy plot plz
# In general, analyze relationship between sem entropy and things for medical setting.
# Then, prefix search (randomly), try to finid at least one that is non-negative accuracy delta
# Try to do chain of thought for "digging" based on how uncertian the model is
# How to make LLM better at estimating how confident it is in its answer
# Intuitively, can we actually do better thanusing embedding layer? Because there's the idea might just be confidently wrong... if its unsure that would be reflected in
# semantic entropy over all perturbations
# semantically similar prompts, how much does the model's confidence change?
# thoguht experiemtns?
# try different perrubation methods (levels (characters, word, setnence/semantic similar))
# TODO: Find pattern in hallucinated vs non-hullucated prmopts. Try to find why, to decerase confident wrorng answers
# TODO: Instead of using edit distance, can we add noise directly to the embedding layer? (semantic noise)
# Look at attention weights, see if there's a pattern in the attention weights that are wrong
# features: semantic entropy, perturbations,
# write down time for experiemetn
def arr_to_prob_freq(arr):
mp = {}
for i in arr:
if i not in mp:
mp[i] = 0
mp[i] += 1
return sorted([(i, mp[i] / len(arr)) for i in mp.keys()], key=lambda x: -x[1])
# if __name__ == '__main__':
# prompts = ["Question 1 test asdf lmao yeet: ...", "Question 2: ...", "Say the letter G"]
# results, probs = get_next_token(prompts)
# print(results)
# print(probs)
# print([get_entropy_from_probabilities(i) for i in probs])
tot_questions = 1000
# tot_questions = get_data_len()
# n_samples = 2000
print(tot_questions)
res = []
correct_count = 0
n_shuffles = 50
temp = {'A' : 0, 'B' : 1, 'C' : 2, 'D' : 3, 'E' : 4, 'F' : 5, 'G' : 6, 'H' : 7}
with tqdm(total=tot_questions) as pbar:
for row in range(tot_questions):
cor_ans = get_correct_answer(row)
tot_sem_entropy = 0
all_responses = []
all_entropies = []
for _ in range(n_shuffles):
cur_prompt, shuffled = get_shuffled_row_query(row)
cur_prompt = [cur_prompt]
response, probs = get_next_token_fast(cur_prompt)
# print(response, probs)
# print("SDF", sum(probs))
entropy = get_entropy_from_probabilities(probs)
tot_sem_entropy += entropy
model_ans = response[0][0]
model_ans = shuffled[temp[model_ans]]
all_responses.append(model_ans)
all_entropies.append(entropy)
print(all_responses)
answer_avg_entropy = sum(np.where(all_responses == cor_ans, all_entropies, 0)) / n_shuffles
avg_entropy = sum(all_entropies) / n_shuffles
model_ans = arr_to_prob_freq(all_responses)[0][0]
correct_count += 1 if model_ans == cor_ans else 0
# Update progress bar with the current percentage of correct answers
pbar.set_postfix({'Correct %': f'{(correct_count / (row + 1)) * 100:.2f}%'})
pbar.update(1)
# print(row, is_correct, entropy, model_ans, cor_ans, probs[0])
res.append({
"row": row,
"avg_entropy": avg_entropy,
"answer_avg_entropy": answer_avg_entropy,
"is_correct": model_ans == cor_ans,
"model_prob":arr_to_prob_freq(all_responses),
"model_response": model_ans,
})
dump_data(res, data_outpath)