-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathevaluate_model.py
99 lines (86 loc) · 3.11 KB
/
evaluate_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import random
import pandas as pd
from transformers import BertTokenizer
from transformers import BertForSequenceClassification
from transformers import TrainingArguments, Trainer
from datasets import Dataset
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
#model = 'bert-base-multilingual-uncased'
#model = 'aubmindlab/bert-base-arabert'
#model = 'qarib/bert-base-qarib'
#model = 'UBC-NLP/MARBERT'
#model = 'CAMeL-Lab/bert-base-arabic-camelbert-da'
#model = 'CAMeL-Lab/bert-base-arabic-camelbert-mix'
model = 'alger-ia/dziribert'
tokenizer = BertTokenizer.from_pretrained(model)
# seeds to reproduce the results of the paper
seeds = [38, 66, 216, 128, 214, 412, 180, 14, 196, 42]
# code to generate new seeds
#seeds = []
#for i in range(10):
# seeds.append(random.randint(1, 500))
# the results presented on the paper were obtined on Tesla K80 GPU
# please check the type of your GPU using the nvidia-smi command
def get_model():
return BertForSequenceClassification.from_pretrained(model, num_labels=3)
# return BertForSequenceClassification.from_pretrained(model, num_labels=10)
def preprocess_function(examples):
return tokenizer(examples['text'], truncation=True, max_length=512)
def compute_metrics(pred):
labels = pred.label_ids
preds = pred.predictions.argmax(-1)
precision, recall, f1, _ = precision_recall_fscore_support(labels,
preds,
average='macro')
acc = accuracy_score(labels, preds)
return {
'accuracy': acc,
'f1': f1,
'precision': precision,
'recall': recall
}
train_data = pd.read_csv('data/train_sent.csv').drop(['Unnamed: 0'], axis=1)
test_data = pd.read_csv('data/test_sent.csv').drop(['Unnamed: 0'], axis=1)
#train_data = pd.read_csv('data/train_emotion.csv').drop(['Unnamed: 0'], axis=1)
#test_data = pd.read_csv('data/test_emotion.csv').drop(['Unnamed: 0'], axis=1)
train_dataset = Dataset.from_pandas(train_data)
test_dataset = Dataset.from_pandas(test_data)
encoded_train = train_dataset.map(preprocess_function, batched=True)
encoded_test = test_dataset.map(preprocess_function, batched=True)
args = TrainingArguments(
"sentimentClassification",
evaluation_strategy = "epoch",
learning_rate=2e-5,
per_device_train_batch_size=8,
per_device_eval_batch_size=4,
num_train_epochs=3,
load_best_model_at_end=True,
metric_for_best_model="accuracy"
)
accuracy = 0
f1 = 0
precision = 0
recall = 0
i = 0
for seed in seeds:
print('seed ', i, ' : ' , seed)
args.seed = seed
trainer = Trainer(
model_init=get_model,
args=args,
train_dataset=encoded_train,
eval_dataset=encoded_test,
tokenizer=tokenizer,
compute_metrics=compute_metrics
)
trainer.train()
res = trainer.evaluate()
accuracy += res['eval_accuracy']
f1 += res['eval_f1']
precision += res['eval_precision']
recall += res['eval_recall']
i += 1
print(accuracy/len(seeds),
f1/len(seeds),
precision/len(seeds),
recall/len(seeds))