-
Notifications
You must be signed in to change notification settings - Fork 5
/
options.yaml
82 lines (81 loc) · 1.63 KB
/
options.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
standard :
# these options are applied to all models
batch_size : 8 # Nearest neighbor search is done in batch. A smaller batch will yield better results but is slower
hashtag_split: True # split hashtags on capital letters,
svd : False # turn sparse tf-idf features into dense ones using Singular Value Decomposition
binary : True #
tfidf_weights: False # use tf-idf weights when averaging Word2Vec vectors
save : True # save features in the form of a .npy or .npz (for sparse vectors) file
save_results: True # save results in a .csv file
lang : "fr"
dataset : "data/event2018.tsv"
annotation : "annotated"
text+ : False
# You may configure specific options for each model using the following fields to overwrite standard parameters
tfidf_all_tweets :
save : False
threshold :
- 0.6
- 0.65
- 0.7
- 0.75
- 0.8
remove_mentions : True
tfidf_dataset :
save : False
threshold :
- 0.55
- 0.6
- 0.65
- 0.7
- 0.75
remove_mentions : True
w2v_gnews_en :
threshold :
- 0.20
- 0.25
- 0.30
- 0.35
- 0.40
remove_mentions : True
sbert_nli_sts:
threshold:
- 0.3
- 0.35
- 0.4
- 0.45
- 0.5
remove_mentions : False
sbert:
threshold:
- 0.4
- 0.45
- 0.50
- 0.55
- 0.6
remove_mentions : False
sub_model : "paraphrase-multilingual-MiniLM-L12-v2"
bert:
threshold:
- 0.02
- 0.03
- 0.04
- 0.05
- 0.06
remove_mentions : False
elmo:
threshold:
- 0.04
- 0.06
- 0.08
- 0.1
- 0.2
remove_mentions : False
use:
threshold:
- 0.3
- 0.4
- 0.45
- 0.5
- 0.55
remove_mentions : False