-
Notifications
You must be signed in to change notification settings - Fork 2
/
predicting_origin.py
163 lines (123 loc) · 52.5 KB
/
predicting_origin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
import numpy as np
import pandas as pd
import lightgbm as lgb
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.model_selection import KFold, train_test_split, RandomizedSearchCV
from sklearn.metrics import confusion_matrix
from textwrap import wrap
def k_fold_evaluation(speeches_df, model, n_splits, features, predictable):
kf = KFold(n_splits=n_splits, shuffle=True)
i = 0
precision_scores = []
for train, test in kf.split(speeches_df):
X_train = speeches_df.iloc[train][features]
y_train = speeches_df.iloc[train][predictable]
X_test = speeches_df.iloc[test][features]
y_test = speeches_df.iloc[test][predictable]
model.fit(X_train, y_train.values.ravel())
expected_y = y_test
predicted_y = lgbm_model.predict(X_test)
precision_scores.append(metrics.precision_score(expected_y, predicted_y, average='micro'))
i += 1
print(r'The average precision score +- std: {} +- {}'.format(np.mean(precision_scores), np.std(precision_scores)))
def model_gridsearch(speeches_df, model, n_splits, gridParams, features, predictable):
grid = RandomizedSearchCV(model, gridParams, cv=n_splits, n_jobs=-1, n_iter=100, verbose=0)
grid.fit(speeches_df[features], speeches_df[predictable].values.ravel())
print('Best parameters: ', grid.best_params_)
return grid.best_params_
def test_set_evaluation(speeches_df_train, speeches_df_test, model_test, features, predictable):
# we split the training set so that we can prevent overfitting of the model
speeches_df_train, speeches_df_validation = train_test_split(speeches_df_train, test_size=0.2, shuffle=True,
random_state=42)
model_test.fit(speeches_df_train[features], speeches_df_train[predictable].values.ravel(),
eval_set=[(speeches_df_validation[features], speeches_df_validation[predictable]),
(speeches_df_train[features], speeches_df_train[predictable])],
early_stopping_rounds=10, verbose=5)
expected_y = speeches_df_test[predictable]
predicted_y = model_test.predict(speeches_df_test[features])
print('Plot metrics during training...')
# ax = lgb.plot_metric(model_test.evals_result_)
# plt.show()
feature_imp = pd.DataFrame(sorted(zip(model_test.feature_importances_, speeches_df_train[features].columns)),
columns=['Value', 'Feature'])
ax1 = feature_imp.iloc[-10:].plot.barh(x="Feature", y="Value")
ticklabels = feature_imp.iloc[-10:]['Feature'].tolist()
ticklabels = ['\n'.join(wrap(l, 13)) for l in ticklabels]
ax1.get_legend().remove()
ax1.set_yticklabels(ticklabels)
ax1.set_ylabel('Features', fontsize=20)
ax1.set_xlabel('Feature importance', fontsize=20)
ax1.xaxis.set_tick_params(labelsize=20)
ax1.yaxis.set_tick_params(labelsize=20)
plt.tight_layout()
plt.savefig('feature_importance.png', dpi=300)
plt.show()
print('The precision score on the test set: ', metrics.precision_score(expected_y, predicted_y, average='micro'))
return (expected_y, predicted_y)
speeches_df = pd.read_csv('speeches_df_1hotencoded_top10words.csv')
speeches_df.dropna(subset=['Region Name'], inplace=True)
features_most_used_words = ['legacy', 'propose', 'ongoing', 'provisional', 'remote', 'ally', 'wonderful', 'olympic', 'black', 'covidsafe', 'deepen', 'lebanon', 'attribute', 'data', 'think', 'rethink', 'disturb', 'pakistan', 'churchill', 'office', 'unrepentant', 'universality', 'rwanda', 'produce', 'sarajevo', 'basic', 'tie', 'gandhi', 'ninawa', 'invite', 'canadian', 'colony', 'mexico', 'kabul', 'write', 'unwittingly', 'conclusion', 'prevention', 'deprivation', 'mountain', 'legislation', 'corrode', 'gentleman', 'blame', 'course', 'export', 'tour', 'defence', 'muscle', 'velvet', 'adjustment', 'review', 'acre', 'support', 'tanzanian', 'situation', 'tajik', 'montevideo', 'angolan', 'iteration', 'census', 'ignorance', 'territory', 'men', 'pristina', 'cancer', 'testimony', 'prez', 'uk', 'unable', 'kwassa', 'la', 'win', 'heart', 'compromise', 'candidly', 'snow', 'jos', 'western', 'ekti', 'fully', 'annexation', 'rabbani', 'enviable', 'monetary', 'layer', 'separate', 'site', 'nigerian', 'consensus', 'game', 'dominant', 'airport', 'limitation', 'educational', 'dea', 'insurance', 'storm', 'health', 'beauty', 'solomon', 'party', 'prosperity', 'credibility', 'extremist', 'allah', 'establish', 'transboundary', 'scourge', 'castro', 'interfaith', 'sovereignty', 'constantly', 'paramilitary', 'disgrace', 'nationalize', 'household', 'exceptional', 'ravage', 'compact', 'blair', 'disintegration', 'coin', 'muslim', 'irresponsible', 'indigenous', 'slowdown', 'multidimensional', 'pyeongcheng', 'resolution', 'watercourse', 'autonomous', 'revitalization', 'stabilize', 'aquila', 'finland', 'commissioner', 'tackle', 'violence', 'push', 'overhang', 'lord', 'financing', 'ehud', 'read', 'funding', 'focus', 'passive', 'binacional', 'recipe', 'indonesia', 'urbanize', 'climb', 'allegation', 'activity', 'footstep', 'signatory', 'primary', 'exemplify', 'contradiction', 'worldwide', 'spaak', 'cypriot', 'farmer', 'upcoming', 'minimum', 'immigration', 'henri', 'flux', 'depth', 'atrocity', 'thailand', 'tool', 'transnational', 'essebsi', 'humanitarian', 'facilitation', 'insurgency', 'holding', 'forcible', 'struggle', 'solidarity', 'hungary', 'sum', 'association', 'inequity', 'darwish', 'proposal', 'increasingly', 'brandt', 'precious', 'reintegration', 'banana', 'hussein', 'fix', 'behaviour', 'supply', 'hectare', 'paint', 'yemeni', 'symbol', 'technically', 'kindred', 'advent', 'radiation', 'start', 'semipalatinsk', 'mt', 'reduction', 'september', 'colleague', 'virus', 'khawarij', 'tokyo', 'unfold', 'matthew', 'alba', 'curtain', 'year', 'condemn', 'prescribe', 'demanding', 'immobility', 'market', 'sint', 'brilliant', 'problem', 'sato', 'mineral', 'mswati', 'disintegrate', 'east', 'uri', 'perfection', 'missile', 'formal', 'brother', 'welfare', 'ebola', 'strip', 'design', 'house', 'emigrant', 'butterfly', 'piracy', 'kazakhstan', 'accident', 'discontent', 'jungle', 'recommendation', 'kurdish', 'kathmandu', 'amend', 'multitude', 'judea', 'wave', 'katowice', 'forgotten', 'direct', 'controversy', 'gomez', 'loss', 'monusco', 'rouhani', 'senegalese', 'ghanaian', 'reliable', 'cynical', 'saudi', 'clash', 'fact', 'strait', 'invented', 'depart', 'unasur', 'land', 'dependable', 'aid', 'norway', 'itu', 'redrawing', 'carefully', 'victory', 'awoonor', 'tambo', 'unamid', 'guatemala', 'parliamentarian', 'premise', 'prisoner', 'section', 'wander', 'cyclone', 'highness', 'class', 'tropical', 'restraint', 'cut', 'chilean', 'founding', 'indonesian', 'bakassi', 'lra', 'algeria', 'lady', 'croatian', 'france', 'cyril', 'banyan', 'benigno', 'sanchez', 'different', 'bread', 'chronic', 'announce', 'oda', 'replenish', 'globally', 'czech', 'flood', 'strange', 'cybercrime', 'houthis', 'preservation', 'prohibit', 'burkina', 'tripoli', 'feminine', 'zeid', 'policy', 'tanzania', 'temperature', 'caliphate', 'chalk', 'leone', 'unrepresentative', 'representation', 'physical', 'da', 'interconnected', 'ould', 'mobility', 'cd', 'play', 'daniel', 'biodiversity', 'bouterse', 'bee', 'mongolia', 'malala', 'synergy', 'formula', 'piera', 'right', 'arbitrariness', 'conviction', 'grim', 'essence', 'iv', 'bangla', 'digitalization', 'earth', 'issue', 'saarc', 'exporter', 'depends', 'adherence', 'conformity', 'choice', 'tobacco', 'mountainous', 'federation', 'shadow', 'wfp', 'geographical', 'israel', 'spending', 'nargis', 'greece', 'abortion', 'rahmon', 'notorious', 'degradation', 'liberty', 'disorder', 'eurozone', 'malian', 'bless', 'kenyatta', 'double', 'lieutenant', 'disarmament', 'saddam', 'pooling', 'somalia', 'enlargement', 'deep', 'bashar', 'broadband', 'ceasefire', 'propaganda', 'africa', 'covenant', 'urban', 'viet', 'agriculture', 'kilometre', 'prejudice', 'dispatch', 'khama', 'human', 'insularity', 'abkhazia', 'iron', 'idol', 'mexican', 'innocence', 'contagious', 'initiative', 'mozambican', 'hard', 'immerse', 'healthy', 'algiers', 'revise', 'eradication', 'caspian', 'congress', 'bangladesh', 'defender', 'traumatic', 'technology', 'participation', 'nomad', 'rescue', 'invasion', 'invade', 'novel', 'thank', 'neutrality', 'anthropology', 'tell', 'happen', 'son', 'quesada', 'excellent', 'mass', 'ecosystem', 'cameroonian', 'predict', 'audit', 'nz', 'responsibly', 'proof', 'zede', 'elect', 'civilized', 'corn', 'muhammad', 'sibelius', 'globalize', 'speech', 'interaction', 'truth', 'opcw', 'attack', 'fake', 'pithy', 'tranquillity', 'brown', 'wish', 'altered', 'fatigue', 'story', 'ecuador', 'redistribution', 'chad', 'croatia', 'interdependence', 'krone', 'unctad', 'daesh', 'sustainable', 'murder', 'irresponsibility', 'unconditionally', 'great', 'entrepreneurship', 'migrate', 'harmonious', 'ngo', 'safety', 'paralyse', 'urgently', 'morning', 'havana', 'service', 'sport', 'coordinated', 'manila', 'fitting', 'fruit', 'municipality', 'mercenary', 'neighbour', 'sisterly', 'campaign', 'explain', 'bin', 'aylan', 'polarization', 'integration', 'crime', 'favourable', 'amis', 'relation', 'intervene', 'sufficiency', 'various', 'resource', 'camp', 'fit', 'ahead', 'happiness', 'activation', 'category', 'interlocutor', 'importance', 'failure', 'america', 'obscurantist', 'italy', 'accessibility', 'cdc', 'diversity', 'obeisance', 'cuba', 'shallow', 'tolerance', 'majesty', 'frame', 'bibi', 'resilience', 'uncontrollable', 'hi', 'partner', 'rajoelina', 'infectious', 'fraud', 'tendency', 'commitment', 'indulgence', 'korean', 'code', 'auspex', 'circulation', 'sri', 'balkan', 'kyoto', 'status', 'boost', 'peru', 'diaoyu', 'despot', 'danilo', 'au', 'darfur', 'trauma', 'identify', 'isaiah', 'repression', 'success', 'vital', 'demilitarized', 'timely', 'liar', 'eac', 'feel', 'respect', 'item', 'portugal', 'golden', 'prefer', 'liberate', 'withdraw', 'living', 'machine', 'russian', 'typical', 'prevalence', 'unifil', 'accordance', 'yen', 'priority', 'aggression', 'thriving', 'kedo', 'permit', 'incitement', 'reservoir', 'industrial', 'fraternal', 'islamist', 'triad', 'humanity', 'australian', 'kosova', 'communication', 'bongo', 'german', 'communism', 'mention', 'combination', 'baghdad', 'gradually', 'samoa', 'hipc', 'fortunately', 'lima', 'radicalism', 'incomparable', 'intensify', 'belizean', 'cox', 'devil', 'god', 'archive', 'range', 'rebel', 'candidate', 'belong', 'paramount', 'inclusion', 'conference', 'connection', 'guinean', 'serbian', 'activate', 'tfg', 'aliir', 'extinguish', 'survivor', 'macedonia', 'fraternity', 'taoudenni', 'unrealistic', 'botswana', 'gender', 'europe', 'corruption', 'extremization', 'commendation', 'valid', 'aaa', 'british', 'raw', 'powerful', 'britain', 'stubborn', 'gdp', 'food', 'asian', 'doha', 'gather', 'sister', 'alter', 'cursory', 'martyr', 'chinggis', 'industry', 'denounce', 'oppressed', 'notoriety', 'fuel', 'unity', 'trail', 'prominent', 'populism', 'reform', 'montenegro', 'nordic', 'internally', 'half', 'discover', 'increase', 'triumph', 'submit', 'grateful', 'unfair', 'benefit', 'profitable', 'extraordinary', 'harmony', 'condition', 'insecurity', 'netanyahu', 'small', 'immigrant', 'separation', 'road', 'installation', 'backwardness', 'mirziyoyev', 'taiwan', 'strongly', 'develop', 'lovro', 'inuit', 'chill', 'hamida', 'refusal', 'disenfranchise', 'tourist', 'goma', 'lag', 'tobago', 'mediation', 'pursuit', 'alcohol', 'socioprofessional', 'lock', 'fee', 'heroin', 'antagonism', 'streak', 'mechanism', 'recommend', 'crucible', 'preamble', 'advance', 'munition', 'paris', 'avail', 'paradigm', 'want', 'affection', 'archipelago', 'face', 'rebuild', 'mutate', 'distortion', 'zapad', 'drive', 'latin', 'oil', 'accession', 'football', 'hitler', 'maoist', 'ben', 'cradle', 'employ', 'gcc', 'sociopolitical', 'zia', 'japanese', 'caput', 'similar', 'moment', 'hamidah', 'pieta', 'corresponding', 'supplement', 'centennial', 'elaboration', 'fluctuation', 'foster', 'clark', 'metohija', 'research', 'brasilia', 'related', 'discord', 'marine', 'sugar', 'electoral', 'gaza', 'career', 'inequality', 'restitution', 'unification', 'prehistory', 'care', 'sword', 'iii', 'digitization', 'jeddah', 'amisom', 'aotearoa', 'civilian', 'myth', 'southern', 'expression', 'violation', 'prophet', 'friend', 'chasm', 'ghani', 'paul', 'factor', 'french', 'archipelagic', 'goodwill', 'belizeans', 'uniosil', 'diagne', 'rouge', 'khanate', 'peninsula', 'upper', 'craze', 'individually', 'consecrate', 'hateful', 'population', 'depicting', 'bystander', 'mediterranean', 'negotiation', 'infrastructure', 'incumbent', 'restoration', 'ghost', 'weapon', 'avenue', 'statesman', 'business', 'unreliable', 'drinking', 'rogue', 'despondent', 'administer', 'rice', 'developing', 'assad', 'igad', 'coalition', 'impede', 'ask', 'combat', 'accountable', 'apple', 'notably', 'number', 'life', 'intend', 'delta', 'convergent', 'venezuelan', 'gpa', 'domestic', 'overcome', 'immensely', 'savannah', 'kenya', 'selflessness', 'target', 'expect', 'pearl', 'socialism', 'spaniard', 'vessel', 'cultivate', 'tiraspol', 'northeast', 'redeployment', 'warning', 'allow', 'diamond', 'commission', 'observation', 'condolence', 'istanbul', 'montenegrin', 'preside', 'revision', 'decarbonization', 'settlement', 'emirates', 'himalaya', 'vulnerability', 'projection', 'bank', 'instability', 'central', 'operation', 'excellency', 'religion', 'dispensary', 'harassment', 'die', 'centrifuge', 'glacier', 'chain', 'capitalism', 'moro', 'tokelau', 'cup', 'rectangular', 'traditional', 'specie', 'colorado', 'libration', 'buy', 'umma', 'citizen', 'realia', 'discussion', 'format', 'available', 'grand', 'sunni', 'trial', 'accuse', 'cameroon', 'dismantle', 'arduous', 'expenditure', 'reintegrate', 'contact', 'downstream', 'aim', 'belgium', 'albania', 'true', 'unstable', 'custom', 'encouraging', 'biosphere', 'monuc', 'chair', 'casualty', 'sinhala', 'interference', 'step', 'hiroshima', 'bahadur', 'widen', 'really', 'chris', 'objective', 'prevail', 'fear', 'deterrence', 'quiet', 'impunity', 'mutilate', 'ilo', 'caudillismo', 'super', 'zone', 'fail', 'unlimited', 'congenial', 'modern', 'biased', 'new', 'yasuni', 'enclave', 'confidence', 'itno', 'lead', 'oman', 'buenos', 'monarch', 'belarus', 'war', 'return', 'icj', 'coastal', 'strengthen', 'compatriot', 'brazilian', 'yavuz', 'mission', 'continental', 'donation', 'logic', 'red', 'atmospheric', 'quarrel', 'egypt', 'know', 'admit', 'judiciary', 'mistrust', 'sixtieth', 'embark', 'gurirab', 'request', 'annapolis', 'adviser', 'prison', 'retain', 'presumption', 'tree', 'stable', 'vein', 'concerted', 'warming', 'rica', 'superficial', 'heaven', 'viable', 'concrete', 'holistic', 'houthi', 'abu', 'capitalist', 'icty', 'energetically', 'intelligence', 'credible', 'mayardit', 'isolated', 'oppressive', 'veto', 'reunify', 'catastrophe', 'worker', 'brain', 'commodity', 'peaceful', 'competence', 'societal', 'deteriorate', 'principle', 'allay', 'locate', 'context', 'documentation', 'chapter', 'mitigate', 'gambian', 'offer', 'equality', 'basotho', 'ctc', 'agency', 'film', 'kita', 'nagorny', 'kyrgyzstan', 'legitimize', 'forget', 'crop', 'tatar', 'jamaica', 'poach', 'aprm', 'kyiv', 'tragic', 'product', 'importantly', 'indisputable', 'persian', 'abundant', 'appreciate', 'arguably', 'bazar', 'relate', 'denis', 'founder', 'appreciated', 'wonder', 'xv', 'amazon', 'soul', 'reason', 'jewish', 'expertise', 'minority', 'management', 'host', 'ready', 'bolaos', 'ldcs', 'malawi', 'economy', 'bouteflika', 'cattle', 'shavkat', 'guide', 'eighteen', 'execution', 'employment', 'apply', 'paralympic', 'satisfy', 'shock', 'lankan', 'reserve', 'bush', 'sep', 'fiji', 'dis', 'filipe', 'efficient', 'electricity', 'wisdom', 'destination', 'petroleum', 'negate', 'nouakchott', 'longer', 'nilufa', 'ict', 'protect', 'significant', 'deed', 'response', 'spell', 'contribution', 'migrant', 'action', 'agree', 'grain', 'elite', 'deadly', 'persistence', 'asean', 'relevance', 'leader', 'ghana', 'millennial', 'universe', 'ldc', 'unveiling', 'restriction', 'ministry', 'compel', 'invested', 'excitement', 'zelaya', 'define', 'imprison', 'multipolarism', 'youthful', 'middle', 'chingiz', 'quota', 'demining', 'ifas', 'spark', 'precedent', 'explosion', 'cyprus', 'nature', 'recovery', 'negotiated', 'ignore', 'peer', 'breast', 'consequence', 'liver', 'concept', 'beneficial', 'actor', 'kenyan', 'sincerity', 'green', 'card', 'reunion', 'gnh', 'chemical', 'memorial', 'dcfta', 'togo', 'philippine', 'david', 'draw', 'element', 'balanced', 'coming', 'understanding', 'micron', 'north', 'transmission', 'paragraph', 'counteract', 'arbitration', 'adequate', 'sexually', 'survival', 'delight', 'fulfilment', 'day', 'fight', 'halt', 'itaipu', 'agricultural', 'corporate', 'libya', 'colour', 'accept', 'duterte', 'yasun', 'resonate', 'emission', 'multilateralism', 'friday', 'national', 'enable', 'clout', 'guise', 'plan', 'scheme', 'desperation', 'aboriginal', 'lithuania', 'fingerprint', 'asymmetric', 'project', 'medina', 'pool', 'alien', 'cartel', 'division', 'flora', 'entire', 'canal', 'cetera', 'ingenuity', 'rico', 'weaken', 'rapporteur', 'senseless', 'inequitable', 'multilaterally', 'egyptian', 'baabda', 'critical', 'ottawa', 'pdr', 'uzbekistan', 'inconvenient', 'level', 'personality', 'regionalism', 'sunday', 'faure', 'zlarin', 'defend', 'subsidy', 'repeal', 'bolvar', 'para', 'endgame', 'effort', 'effectiveness', 'defuse', 'uphold', 'sana', 'fruitful', 'warmly', 'esteem', 'leading', 'maritime', 'torture', 'tower', 'austria', 'attempt', 'democratization', 'conscription', 'bali', 'doumeira', 'room', 'plastic', 'caldern', 'afghanistan', 'recent', 'ubuntu', 'profit', 'measurement', 'accomplishment', 'troop', 'kashmir', 'cereal', 'option', 'attache', 'franois', 'likely', 'acknowledge', 'soft', 'acceptable', 'hop', 'forebear', 'complementary', 'building', 'interim', 'firearm', 'turkish', 'copper', 'hassan', 'imperial', 'malnutrition', 'assembly', 'unsustainable', 'money', 'declaration', 'icelandic', 'dear', 'coexistence', 'multicultural', 'cotton', 'stimulate', 'winter', 'miloevic', 'suivi', 'designate', 'fourth', 'hold', 'proud', 'racism', 'murderer', 'icu', 'spread', 'consumption', 'remnant', 'tohoku', 'transgression', 'demonstrator', 'geopolitical', 'museum', 'owe', 'duque', 'let', 'milan', 'article', 'quality', 'costa', 'judgment', 'unite', 'abject', 'mbeki', 'victoria', 'arab', 'avoidable', 'creation', 'almighty', 'scrap', 'remedy', 'instraw', 'demonstrate', 'abroad', 'childcare', 'partial', 'sand', 'warn', 'kremlin', 'constitution', 'organisation', 'plo', 'adolescent', 'reasonable', 'mejlis', 'prime', 'evacuate', 'nepali', 'troika', 'unmil', 'bicommunal', 'environmental', 'incomprehensible', 'maybe', 'benghazi', 'unilateral', 'coercive', 'injure', 'rakhine', 'soil', 'busan', 'settle', 'unoci', 'celac', 'public', 'consolidate', 'captain', 'kuwaiti', 'raphael', 'civil', 'teresa', 'disenfranchised', 'lakes', 'hate', 'ltte', 'practical', 'expense', 'protectionist', 'guaran', 'linkage', 'federated', 'group', 'remittance', 'promulgate', 'dominican', 'information', 'srebrenica', 'canada', 'mkapa', 'long', 'depression', 'happy', 'imam', 'modernization', 'environment', 'globalization', 'surplus', 'endless', 'caucasian', 'prior', 'book', 'custodian', 'cultivation', 'hajj', 'cell', 'readiness', 'react', 'ouattara', 'deputy', 'stability', 'yemen', 'grenade', 'devastation', 'impartiality', 'amir', 'minsk', 'argentinian', 'kasai', 'san', 'napkin', 'ondimba', 'london', 'exclusive', 'regional', 'tough', 'reef', 'burden', 'mobilization', 'defeat', 'vastly', 'haiyan', 'barrow', 'charter', 'large', 'exchange', 'operational', 'meet', 'moroccan', 'unfinished', 'affect', 'peacekeeper', 'particularly', 'continue', 'merchant', 'dick', 'honor', 'stipulated', 'steadily', 'malaria', 'real', 'wait', 'caretaker', 'abyan', 'spite', 'conception', 'boldness', 'guatemalan', 'freedom', 'parliamentary', 'darya', 'twist', 'harness', 'mahgreb', 'lankans', 'milf', 'shebaa', 'collectively', 'equitable', 'cheap', 'cleansing', 'price', 'optional', 'respirator', 'surakiart', 'abandon', 'machinery', 'beef', 'armenian', 'process', 'endemic', 'mislead', 'illusion', 'piece', 'social', 'man', 'dam', 'sized', 'soviet', 'bhutan', 'despotism', 'native', 'goya', 'imperialist', 'colonial', 'julian', 'damnation', 'unacceptable', 'pandemic', 'italian', 'improve', 'blackmail', 'calorie', 'forward', 'package', 'megawatt', 'remember', 'rail', 'corfu', 'herald', 'child', 'malagasy', 'order', 'ratify', 'emotional', 'gabon', 'nistru', 'intrinsically', 'commemoration', 'charity', 'clause', 'spill', 'cyberspace', 'mosquito', 'stevens', 'underdevelopment', 'arm', 'budget', 'quo', 'kabor', 'transparency', 'royal', 'scholar', 'fish', 'question', 'eiti', 'bet', 'phenomenon', 'mara', 'miss', 'compass', 'young', 'kazakh', 'transformation', 'mandela', 'enlightened', 'fras', 'interreligious', 'authoritarian', 'lie', 'plus', 'politician', 'outbreak', 'donor', 'maldonado', 'drug', 'decolonization', 'neighbourly', 'tps', 'build', 'bulgarian', 'recognize', 'basin', 'reality', 'astana', 'volunteer', 'eu', 'pregnant', 'field', 'adopt', 'perception', 'brotherhood', 'participates', 'goliath', 'ban', 'esh', 'content', 'categorically', 'essential', 'labour', 'teacher', 'mauritania', 'recession', 'articulate', 'madiba', 'liveable', 'line', 'soccer', 'reconcile', 'hashemite', 'fighter', 'macedonian', 'seafarer', 'alassane', 'insoluble', 'news', 'drama', 'burma', 'brockmann', 'lasting', 'structure', 'tumor', 'administrative', 'zimbabwean', 'turkey', 'realization', 'spanish', 'regulation', 'referendum', 'sell', 'imposition', 'video', 'cop', 'achievement', 'come', 'anniversary', 'humbly', 'maghreb', 'trump', 'control', 'july', 'commercial', 'vienna', 'basis', 'consider', 'celebration', 'inflation', 'strategy', 'imperative', 'aspiration', 'hammarskjld', 'symposium', 'online', 'minustah', 'sovereign', 'boko', 'dishearten', 'cwc', 'penalty', 'adoption', 'monitoring', 'continent', 'city', 'coca', 'obstacle', 'delivering', 'grip', 'contract', 'azerbaijani', 'toumani', 'study', 'varosha', 'polio', 'uae', 'approach', 'oecd', 'socioeconomic', 'hub', 'informal', 'treaty', 'bizonal', 'geography', 'terrorism', 'sea', 'northern', 'epidemic', 'loved', 'island', 'interconnect', 'persecution', 'sustainability', 'headquarters', 'corporation', 'intercommunal', 'peshmerga', 'bishkek', 'timorese', 'setting', 'understand', 'colombia', 'paramos', 'swedish', 'vulture', 'rid', 'duty', 'head', 'pacific', 'harder', 'enormous', 'pluralist', 'mansour', 'reverse', 'machination', 'trio', 'roosevelt', 'mortality', 'peace', 'peasant', 'advice', 'telecommunication', 'cooperative', 'inhabit', 'shanghai', 'palm', 'ivoire', 'settler', 'durban', 'asem', 'khmer', 'baggage', 'impossible', 'stop', 'doctor', 'china', 'appropriate', 'thai', 'launch', 'reparation', 'shia', 'special', 'moderation', 'girl', 'buddhist', 'coherent', 'resignation', 'incident', 'rahman', 'jack', 'armenia', 'untie', 'route', 'kuwait', 'conservation', 'value', 'contrast', 'displace', 'compaor', 'case', 'precondition', 'thwart', 'centre', 'inclusive', 'inexplicable', 'egoism', 'forcibly', 'echeique', 'early', 'meaning', 'heartfelt', 'miner', 'elecam', 'micro', 'player', 'trade', 'trading', 'spy', 'definitive', 'transition', 'exploitation', 'martelly', 'male', 'provincial', 'shariah', 'guardian', 'crunch', 'sanctuary', 'hague', 'commonwealth', 'transportation', 'decide', 'decline', 'purchase', 'stay', 'ossetia', 'mubarak', 'fragile', 'proposes', 'isil', 'academy', 'boziz', 'estonia', 'diagnosis', 'desertification', 'fragmentation', 'systemic', 'jarabulus', 'jurisdiction', 'millennium', 'meeting', 'standard', 'expand', 'reconciliation', 'useful', 'unified', 'ukrainian', 'necessarily', 'ivory', 'ypres', 'default', 'intolerance', 'voter', 'unless', 'restructuring', 'voting', 'surely', 'prosecute', 'displacement', 'mprp', 'law', 'mining', 'way', 'remoteness', 'governmental', 'kurdistan', 'investment', 'philosophy', 'abolition', 'demon', 'coordination', 'russia', 'indisputably', 'porfirio', 'remind', 'university', 'ranking', 'mayor', 'parameter', 'eurasian', 'amu', 'disease', 'rom', 'cyber', 'colonize', 'revenue', 'record', 'tribe', 'militant', 'mix', 'sids', 'opportunist', 'displaced', 'challenge', 'eastern', 'contribute', 'distrust', 'express', 'islamic', 'member', 'decisively', 'hadhari', 'mindfulness', 'aire', 'inform', 'mauritian', 'genocide', 'length', 'cicig', 'childbirth', 'haitian', 'example', 'good', 'voice', 'malaysia', 'injustice', 'inertia', 'outlaw', 'lukashenko', 'pariah', 'born', 'strong', 'debt', 'task', 'additionally', 'setback', 'arrow', 'wto', 'reconstruction', 'appreciation', 'polish', 'unexploded', 'impose', 'pretzel', 'vice', 'infect', 'kordofan', 'caste', 'short', 'connected', 'document', 'caribbean', 'say', 'shame', 'ahimsa', 'abuse', 'convey', 'programme', 'fascism', 'exciting', 'civic', 'caricom', 'live', 'iranian', 'arakan', 'job', 'juba', 'character', 'determined', 'malvinas', 'terrible', 'replace', 'prosecutor', 'welcome', 'actual', 'pledge', 'unheeded', 'outflow', 'tribunal', 'tamil', 'protracted', 'chairmanship', 'cyberwarfare', 'difficult', 'perseverance', 'bull', 'authenticity', 'liberalization', 'unmik', 'refoulement', 'incoming', 'christian', 'activist', 'mobilize', 'nieto', 'nepalese', 'methodius', 'cessation', 'christianity', 'egregious', 'contest', 'reciprocal', 'mankind', 'secondly', 'statecraft', 'nguesso', 'strategic', 'discrimination', 'illegal', 'fao', 'rhodes', 'divided', 'gigantic', 'definition', 'glimmer', 'current', 'comit', 'yoga', 'enriched', 'note', 'juncture', 'disorientation', 'culmination', 'benazir', 'peruvian', 'renovation', 'nazism', 'lofty', 'lao', 'sector', 'frozen', 'promote', 'measure', 'tunisian', 'specifically', 'cost', 'sorry', 'paradox', 'itf', 'regard', 'linger', 'grandparent', 'barbarity', 'inhabitant', 'bus', 'idea', 'excessive', 'jammu', 'skill', 'peacekeeping', 'dense', 'lifestyle', 'stabilization', 'condemns', 'highly', 'protocol', 'search', 'generation', 'mercosur', 'free', 'horrible', 'pipeline', 'suggestion', 'dig', 'prespa', 'illicit', 'speak', 'lengthy', 'descendant', 'preferential', 'dhaka', 'lebanese', 'error', 'belt', 'vietnamese', 'namibia', 'representative', 'chile', 'boy', 'oic', 'albanian', 'djibouti', 'athens', 'deliver', 'ocean', 'dream', 'thant', 'disputable', 'bokova', 'reporting', 'internet', 'desert', 'unmiss', 'restrict', 'saint', 'hiv', 'forbid', 'aquino', 'outfit', 'tragedy', 'nsedp', 'dialogue', 'sensitivity', 'khaleda', 'encouragement', 'construction', 'thousand', 'syria', 'consent', 'chernobyl', 'digital', 'obama', 'intervention', 'birthday', 'arei', 'bag', 'precarious', 'century', 'guard', 'coat', 'transdniestrian', 'stereotype', 'ashgabat', 'print', 'personal', 'norm', 'kingdom', 'restore', 'disruption', 'adopted', 'christchurch', 'bakiev', 'cambodia', 'faso', 'emphasize', 'underdeveloped', 'lusaka', 'khalifa', 'verge', 'monopolize', 'method', 'month', 'subject', 'bomb', 'ordnance', 'forensic', 'industrious', 'solution', 'cent', 'unkind', 'fulfilled', 'abbas', 'rainforest', 'perpetrator', 'banker', 'essozimna', 'niamey', 'medicine', 'department', 'fluid', 'score', 'et', 'wife', 'firmly', 'isolationist', 'tariff', 'platform', 'withdrawal', 'congressional', 'colom', 'kermadec', 'geneva', 'view', 'michael', 'worthy', 'drain', 'lion', 'columbus', 'undermine', 'resettlement', 'georgia', 'nato', 'unrest', 'primarily', 'inflexible', 'extensive', 'handout', 'multiplicity', 'diplomacy', 'reach', 'riva', 'disagree', 'dunant', 'adriatic', 'franco', 'obrador', 'adverse', 'arsa', 'protection', 'untimely', 'matter', 'sailor', 'page', 'duplication', 'zambia', 'municipal', 'el', 'barbarism', 'antananarivo', 'humid', 'presidency', 'coverage', 'phone', 'latvian', 'concentrate', 'humankind', 'isa', 'nurse', 'popular', 'joint', 'donorship', 'visit', 'delivery', 'ctbt', 'military', 'journal', 'transnistrian', 'synonymous', 'framework', 'dictatorship', 'ethiopia', 'aitmatov', 'irreparable', 'tourism', 'district', 'scientific', 'communal', 'swaziland', 'accountability', 'mujibur', 'mogadishu', 'infant', 'journey', 'abdel', 'criminality', 'nuclear', 'minusca', 'slow', 'directive', 'elephant', 'earn', 'mutilation', 'biological', 'warplane', 'key', 'nicaragua', 'induce', 'active', 'communiqu', 'underlie', 'mrisho', 'possibly', 'hotspot', 'wellbeing', 'prelude', 'syr', 'loser', 'procedure', 'divide', 'summit', 'observer', 'inappropriate', 'malaise', 'evolve', 'dna', 'dayton', 'jerusalem', 'mood', 'landmines', 'solar', 'arctic', 'forced', 'pound', 'vicious', 'beacon', 'afghan', 'assassination', 'succeed', 'holodomor', 'prerequisite', 'reference', 'incompetence', 'urbanization', 'include', 'tashkent', 'issa', 'shape', 'wednesday', 'generosity', 'barrel', 'rosales', 'foreign', 'resolve', 'minusma', 'sanction', 'like', 'bissau', 'sexual', 'pluralism', 'sultanate', 'greenland', 'tradition', 'counterfeit', 'undivided', 'sphere', 'evasion', 'retaliate', 'subregion', 'center', 'bolivian', 'disability', 'newater', 'bottleneck', 'delegation', 'output', 'federative', 'rajiv', 'consolidation', 'hamad', 'initiate', 'upr', 'criticism', 'brazzaville', 'partnership', 'rallying', 'austerity', 'qualify', 'actively', 'opinion', 'idp', 'future', 'mistake', 'panamanian', 'lldcs', 'auschwitz', 'talk', 'substantial', 'embassy', 'industrialized', 'phrase', 'frequently', 'toothless', 'holy', 'company', 'assimilate', 'choose', 'ton', 'onus', 'parallel', 'monotheism', 'uganda', 'aziz', 'death', 'yesterday', 'cplp', 'slaughter', 'electronic', 'linguistic', 'winner', 'dutch', 'seabed', 'strengthening', 'certainly', 'yugoslav', 'connotation', 'forest', 'patient', 'able', 'climate', 'ollanta', 'brave', 'youth', 'entrepreneur', 'observes', 'counter', 'gain', 'trafficker', 'dead', 'mekong', 'marrakesh', 'monitor', 'test', 'zabala', 'knowledge', 'nagorno', 'hydropower', 'exists', 'leaf', 'assumption', 'japan', 'average', 'enrique', 'term', 'crimean', 'famous', 'pressure', 'uruguayan', 'logo', 'transit', 'shot', 'society', 'kivu', 'equal', 'promotion', 'ahtisaari', 'kill', 'make', 'extremism', 'speculation', 'everyday', 'ansar', 'romania', 'institution', 'evil', 'bangkok', 'pact', 'ill', 'unilateralist', 'tunisia', 'zealand', 'american', 'salvadoran', 'stress', 'delano', 'correction', 'feature', 'dmz', 'belief', 'look', 'specific', 'ncds', 'major', 'constitutional', 'bridge', 'citizenry', 'vigorously', 'location', 'fratricidal', 'nigeria', 'schedule', 'deliberation', 'brussels', 'attitude', 'katoa', 'verify', 'peacebuilding', 'argentine', 'intensive', 'creditor', 'past', 'mayan', 'barbershop', 'convening', 'mudra', 'lesotho', 'chaos', 'magufuli', 'dissociate', 'replete', 'liquidity', 'petersburg', 'judge', 'loud', 'exclusion', 'delegate', 'silence', 'transport', 'interested', 'venezuela', 'body', 'potato', 'immoral', 'cherish', 'chadian', 'little', 'chancellor', 'slovak', 'islam', 'yojana', 'sahel', 'willing', 'walk', 'extraterritorial', 'immunizable', 'poor', 'bulgaria', 'responsive', 'taiz', 'security', 'final', 'mental', 'nightmare', 'wall', 'maize', 'kim', 'rajapaksa', 'liberia', 'poaching', 'unavoidable', 'guinea', 'begin', 'proximity', 'portuguese', 'tyrant', 'anarchy', 'plane', 'flight', 'ecuadorian', 'apec', 'vocation', 'window', 'spotlight', 'composition', 'collective', 'rudaki', 'eulex', 'kinshasa', 'wake', 'rivalry', 'taxpayer', 'descent', 'trend', 'bullet', 'convergence', 'enforcer', 'opportunity', 'onslaught', 'convince', 'disparity', 'nigerien', 'affluence', 'village', 'arabic', 'opposition', 'legality', 'gayoom', 'backwards', 'wmd', 'israeli', 'imagine', 'mp', 'programming', 'tolerant', 'swap', 'icelander', 'link', 'resurgence', 'strike', 'cancel', 'equilibrium', 'premier', 'team', 'andsf', 'unipolar', 'tiny', 'strife', 'civilization', 'modernity', 'lever', 'fearless', 'emphatic', 'constructive', 'clear', 'decision', 'ghassan', 'previously', 'thirdly', 'solve', 'appeal', 'pittsburgh', 'external', 'neutral', 'seizure', 'bad', 'connect', 'falsely', 'yerevan', 'worry', 'depot', 'gratify', 'haftar', 'bosnia', 'african', 'hormuz', 'slovenia', 'tank', 'education', 'preventive', 'compensate', 'convention', 'stove', 'occupation', 'gibraltar', 'president', 'cancn', 'prescription', 'pea', 'madagascar', 'bode', 'ozone', 'evaluate', 'att', 'idriss', 'famed', 'obsolete', 'vientiane', 'kosovo', 'rebellious', 'iraqi', 'extreme', 'democratize', 'smuggler', 'threat', 'capable', 'figure', 'liberian', 'salam', 'rate', 'galapagos', 'likewise', 'sahara', 'run', 'geological', 'diligent', 'habit', 'compliance', 'riyadh', 'crack', 'observance', 'zakat', 'interdependency', 'hugo', 'word', 'aden', 'chvez', 'reject', 'abraham', 'represent', 'compassion', 'massive', 'fossil', 'ddrr', 'medya', 'abbasi', 'romanian', 'farc', 'chagos', 'guarani', 'cervantes', 'smuggle', 'jirga', 'kigali', 'fdlr', 'bloc', 'heinous', 'explosive', 'zealander', 'sendai', 'zardari', 'advanced', 'odds', 'togolese', 'olympics', 'plunge', 'grow', 'police', 'conscience', 'aruba', 'exercise', 'nile', 'ricans', 'obstructive', 'attractive', 'slave', 'ulaanbaatar', 'liberation', 'evenly', 'neoliberal', 'especially', 'famagusta', 'imo', 'aggressor', 'demand', 'irrigation', 'multiple', 'berlin', 'libyan', 'smoker', 'light', 'khartoum', 'milk', 'eritrea', 'sierra', 'trust', 'rush', 'racist', 'result', 'mohammed', 'breakthrough', 'mayotte', 'investigation', 'hezbollah', 'remiss', 'mural', 'authentic', 'revolution', 'bangabandhu', 'instead', 'fauna', 'deby', 'concerned', 'intellectual', 'mano', 'inclined', 'autism', 'undof', 'remark', 'alejandro', 'lemkin', 'age', 'argentina', 'digest', 'educate', 'gulf', 'sadc', 'fairly', 'tskhinvali', 'institutionalize', 'vii', 'position', 'frank', 'zionist', 'nationwide', 'significance', 'abstraction', 'tbilisi', 'klaus', 'fell', 'rest', 'cybersecurity', 'unit', 'rule', 'harmonize', 'safe', 'dare', 'progress', 'pollution', 'mahmoud', 'independence', 'maduro', 'uruguay', 'gold', 'formidable', 'resident', 'history', 'stock', 'preparation', 'morazn', 'asan', 'dispute', 'anger', 'pluralistic', 'dollar', 'dynamic', 'sense', 'station', 'justice', 'earthquake', 'racial', 'miguel', 'west', 'bamian', 'sellu', 'doubt', 'kashmiri', 'claim', 'assange', 'medium', 'port', 'govern', 'ought', 'thirst', 'dag', 'irish', 'minurcat', 'demographic', 'unheard', 'fought', 'residual', 'industrialization', 'republic', 'jubilee', 'accelerated', 'encroach', 'kidal', 'fever', 'poisonous', 'pirate', 'inconsistency', 'role', 'bangalee', 'artsakh', 'ticad', 'distant', 'inclusiveness', 'honduras', 'comprehensive', 'transitional', 'require', 'enniskillen', 'alliance', 'malta', 'form', 'unlock', 'nutrition', 'modi', 'aral', 'mali', 'darkness', 'end', 'greek', 'learning', 'ethiopian', 'cfe', 'zero', 'advocate', 'topic', 'classification', 'determination', 'unep', 'spring', 'medical', 'proactive', 'francophonie', 'homage', 'salman', 'rajaonarimampianina', 'enjoy', 'exist', 'trinidad', 'sandinista', 'coincide', 'simple', 'mosque', 'certain', 'plant', 'transfer', 'accelerate', 'deal', 'underscore', 'responsively', 'noble', 'palestinian', 'shelf', 'manifesto', 'finally', 'filipino', 'metre', 'merit', 'cte', 'irma', 'geothermal', 'fighting', 'vi', 'overarch', 'protest', 'sofia', 'passion', 'remain', 'slavery', 'painful', 'timetable', 'suit', 'rafael', 'devotion', 'interception', 'els', 'multilateralists', 'workshop', 'expo', 'haemorrhage', 'hatred', 'seek', 'robust', 'interpol', 'logging', 'reassure', 'faithfully', 'celebrate', 'kiir', 'probably', 'radicalization', 'south', 'jcpoa', 'emirate', 'lanka', 'diplomatic', 'biya', 'fuse', 'participate', 'billion', 'maarten', 'rich', 'ntc', 'particular', 'alright', 'finished', 'bahrain', 'eswatini', 'dahalo', 'distribution', 'iaea', 'coast', 'complete', 'hurricane', 'rain', 'velasquez', 'transborder', 'proliferation', 'today', 'lisbon', 'main', 'iran', 'succession', 'attachment', 'mainstreamed', 'conflict', 'unpredictable', 'marriage', 'fate', 'possibility', 'intercultural', 'late', 'unesco', 'biology', 'cousin', 'germany', 'proper', 'testing', 'session', 'buddha', 'empire', 'conserve', 'bosnian', 'enhanced', 'thabo', 'neighbor', 'scale', 'zambian', 'couple', 'crown', 'cede', 'personally', 'occupy', 'diversification', 'signing', 'furthermore', 'indivisibility', 'saemaul', 'flag', 'abduction', 'uncertainty', 'meltdown', 'willingness', 'developed', 'countryman', 'wound', 'privilege', 'cooperate', 'preparatory', 'intention', 'liveability', 'ortega', 'universal', 'college', 'tasso', 'abdelaziz', 'complicate', 'lopez', 'purity', 'fissile', 'menace', 'congo', 'science', 'brighter', 'osce', 'panama', 'kikwete', 'theft', 'greatly', 'cold', 'democratic', 'chance', 'integral', 'date', 'zanzibar', 'faith', 'theatre', 'damage', 'occupied', 'message', 'fifteenth', 'correa', 'nepad', 'dawn', 'high', 'monster', 'stakeholder', 'levant', 'invest', 'hear', 'playing', 'mark', 'suffering', 'irregular', 'technological', 'approve', 'force', 'connectivity', 'debate', 'detriment', 'mean', 'exceptionally', 'comoros', 'yusuf', 'shore', 'singapore', 'undertaken', 'draft', 'genuinely', 'empowerment', 'mladic', 'correspond', 'assistance', 'sheikh', 'marawi', 'instrument', 'exactly', 'complementarity', 'polluter', 'sudan', 'puerto', 'exile', 'indivisible', 'competitiveness', 'stubbornly', 'sect', 'moderate', 'aggressive', 'spain', 'fundamental', 'separatist', 'slovakia', 'civilizational', 'pregnancy', 'omer', 'unlawful', 'espionage', 'helen', 'park', 'warehouse', 'legal', 'nicolas', 'dioxide', 'downplay', 'overwhelm', 'rank', 'koutou', 'aeneas', 'seoul', 'responsibility', 'smokescreen', 'pursue', 'vaccination', 'feminist', 'sort', 'producer', 'membership', 'engine', 'battlefield', 'street', 'vote', 'cane', 'chinese', 'york', 'vibrant', 'victim', 'ukraine', 'gang', 'india', 'golan', 'equity', 'heightened', 'summer', 'difference', 'image', 'astronomical', 'excess', 'governor', 'healthful', 'tuberculosis', 'forever', 'disaster', 'easily', 'mind', 'discourse', 'anc', 'schuman', 'mate', 'rapid', 'drone', 'unama', 'woman', 'oxygen', 'moldova', 'danish', 'opec', 'argentines', 'mediator', 'betray', 'pas', 'bear', 'justified', 'historic', 'concessionary', 'wasa', 'payment', 'computer', 'francisco', 'freshwater', 'ivorian', 'smoke', 'inadequate', 'consular', 'frankly', 'inclusivity', 'remarkable', 'heating', 'peril', 'official', 'possible', 'mahinda', 'deforestation', 'gate', 'fatf', 'emergency', 'voluntarily', 'pida', 'vaccine', 'karimov', 'violent', 'reduce', 'imf', 'belgrade', 'agreement', 'cornerstone', 'spouse', 'nicols', 'election', 'wild', 'affiliate', 'incomplete', 'forge', 'tend', 'share', 'undertake', 'electrical', 'infallible', 'molina', 'malawian', 'eliminate', 'megatrends', 'commend', 'itmo', 'muster', 'serb', 'sedition', 'terror', 'underlying', 'georgian', 'shaheed', 'functional', 'strength', 'wastewater', 'culture', 'malaysian', 'ramsi', 'gap', 'successfully', 'gambia', 'progressively', 'cubic', 'desir', 'burundi', 'paraguayan', 'formalize', 'registration', 'command', 'refer', 'gas', 'chevron', 'patience', 'attach', 'experience', 'recall', 'zimbabwe', 'reflection', 'ethical', 'guilty', 'mdg', 'impediment', 'benin', 'encroaching', 'european', 'sweden', 'borrow', 'commit', 'domain', 'palestine', 'accord', 'emerge', 'koran', 'slavic', 'aristotle', 'affected', 'terrorist', 'liberal', 'homs', 'arabia', 'donbas', 'hydrogen', 'nyusi', 'lgbtqi', 'handle', 'testament', 'horn', 'tail', 'bolivarian', 'wrongdoing', 'architecture', 'nepal', 'immense', 'subway', 'carthage', 'hand', 'competitive', 'cervical', 'idai', 'ballistic', 'denmark', 'determine', 'inviolability', 'unclos', 'counsellor', 'commander', 'socialist', 'outsider', 'tailing', 'sociological', 'kurmanbek', 'secure', 'protectionism', 'odihr', 'ra', 'brazil', 'ireland', 'million', 'confusion', 'rocket', 'clinic', 'animal', 'imbalance', 'aside', 'legalize', 'stimulation', 'ministerial', 'lawlessness', 'work', 'realistic', 'region', 'misinformation', 'arsenal', 'korea', 'cambodian', 'disinformation', 'agenda', 'organize', 'intimately', 'thein', 'targeted', 'sacrifice', 'haj', 'personnel', 'property', 'halve', 'stalled', 'prestigious', 'checkpoint', 'trafficking', 'rampart', 'salva', 'unu', 'oppose', 'guyana', 'azeri', 'switzerland', 'motif', 'finnish', 'cultural', 'disrespect', 'hall', 'baltic', 'populated', 'kyrgyz', 'comrade', 'teamwork', 'naturally', 'criterion', 'exacerbation', 'nam', 'caucasus', 'xi', 'barrier', 'honduran', 'urgent', 'notion', 'zagreb', 'begum', 'optimism', 'felipe', 'hybrid', 'ago', 'mirziyoev', 'iceland', 'prosper', 'vitally', 'renounce', 'excel', 'risk', 'foreigner', 'courier', 'nowadays', 'mainstream', 'robot', 'recast', 'water', 'irrelevant', 'cooperation', 'gl', 'sure', 'jordanian', 'kind', 'manage', 'slowly', 'tehran', 'criticize', 'exploit', 'constituent', 'budapest', 'eleventh', 'hotbed', 'assessment', 'abdullah', 'jeopardizes', 'taliban', 'finance', 'highlight', 'blockchain', 'railway', 'base', 'phase', 'extract', 'reunification', 'hardly', 'destiny', 'combatant', 'sectarian', 'serve', 'easy', 'undp', 'river', 'nanotechnology', 'somali', 'selection', 'jamaican', 'joy', 'criminal', 'political', 'province', 'thematic', 'haunt', 'mtis', 'mr', 'listen', 'considerable', 'provide', 'suriname', 'thing', 'congolese', 'overseas', 'cybersphere', 'destroy', 'friendly', 'mbaye', 'moldovan', 'beginning', 'eradicate', 'structural', 'additional', 'exodus', 'conditionality', 'colombian', 'application', 'negotiating', 'federal', 'universally', 'culminate', 'ruler', 'supporter', 'resort', 'locomotive', 'period', 'narcotic', 'conditional', 'assertiveness', 'soulless', 'effectively', 'affair', 'genital', 'apparent', 'nutritional', 'dimension', 'opera', 'communicate', 'alleviate', 'hungarian', 'koroma', 'legitimacy', 'breach', 'intrinsic', 'advisory', 'promising', 'discovery', 'assault', 'distinguish', 'energize', 'fund', 'shared', 'coronavirus', 'guam', 'secular', 'alvarado', 'simn', 'belgian', 'easter', 'pyeongchang', 'patriotic', 'reciprocity', 'mdgs', 'saboteur', 'democracy', 'council', 'netherlands', 'dignity', 'door', 'newborn', 'crimea', 'egotism', 'archbishop', 'cote', 'model', 'risky', 'broker', 'gni', 'heat', 'morality', 'asia', 'committee', 'present', 'frustration', 'create', 'saleh', 'guarantee', 'superheroes', 'community', 'atomic', 'fukuyama', 'supercomputer', 'highway', 'homeland', 'armed', 'nga', 'ensure', 'austrian', 'jordan', 'maternal', 'heritage', 'destruction', 'ali', 'leopard', 'clean', 'indian', 'countless', 'negative', 'integrity', 'ceibal', 'hernndez', 'safeguard', 'cruel', 'income', 'isi', 'maputo', 'militancy', 'fmct', 'area', 'tension', 'contamination', 'divine', 'trillion', 'esquipulas', 'escoto', 'currency', 'talat', 'daca', 'unilaterally', 'namibian', 'realign', 'politics', 'allege', 'arabian', 'student', 'holocaust', 'clearly', 'learn', 'legislative', 'pain', 'dozen', 'list', 'planet', 'nursultan', 'privacy', 'somalis', 'secretary', 'organized', 'round', 'moral', 'warfare', 'lithuanian', 'chintana', 'courage', 'goal', 'navigation', 'postgraduate', 'awareness', 'old', 'incredible', 'union', 'fairer', 'alleviation', 'obligation', 'handbook', 'mother', 'lose', 'low', 'growth', 'radical', 'impulse', 'curaao', 'bolder', 'xenophobic', 'language', 'repatriation', 'kinder', 'decent', 'ribbon', 'underline', 'optimal', 'spirit', 'yes', 'migration', 'siege', 'deployment', 'religious', 'landlocked', 'ratko', 'laundering', 'enemy', 'nvites', 'innovation', 'cpa', 'al', 'vulnerable', 'attention', 'periodic', 'dniester', 'icglr', 'crash', 'niger', 'icc', 'baku', 'wildlife', 'spirituality', 'patriotism', 'abdulaziz', 'single', 'deposit', 'denuclearization', 'consternation', 'lobo', 'feeling', 'chavez', 'franklin', 'interrelated', 'storage', 'yeasmin', 'precipitation', 'multilateral', 'occupier', 'potential', 'backdrop', 'secret', 'behave', 'imperialism', 'erga', 'hostage', 'blend', 'credit', 'sosa', 'plurality', 'supervision', 'town', 'roof', 'production', 'approximately', 'singh', 'promise', 'female', 'profile', 'candlelight', 'recreate', 'enrichment', 'crucial', 'arena', 'carlos', 'collapsed', 'visionary', 'correctly', 'equipment', 'productive', 'input', 'modest', 'corridor', 'mutual', 'tenure', 'battle', 'hemisphere', 'english', 'scientist', 'engage', 'indignation', 'athlete', 'dramatic', 'lake', 'positive', 'sanitary', 'toyako', 'believe', 'angola', 'mobile', 'jihadist', 'reappointing', 'purpose', 'adaptation', 'unpaid', 'affirm', 'exhibition', 'misunderstood', 'common', 'hunger', 'creativity', 'father', 'gun', 'rebellion', 'identity', 'assurance', 'spend', 'leadership', 'candidature', 'ninth', 'assassinate', 'pbc', 'macroeconomic', 'behalf', 'observe', 'triangular', 'indicator', 'vision', 'openness', 'training', 'prevent', 'guerrilla', 'nazi', 'prototype', 'ship', 'multiply', 'flow', 'sall', 'financial', 'truly', 'week', 'dushanbe', 'bloodshed', 'press', 'birth', 'indifference', 'stage', 'maintenance', 'damian', 'preserve', 'kindness', 'cotonou', 'sustained', 'norwegian', 'spade', 'cave', 'devour', 'greentree', 'coral', 'doctrine', 'renewal', 'sow', 'ysani', 'difficulty', 'consolidating', 'farming', 'answer', 'csto', 'enhance', 'stateless', 'argument', 'liberated', 'flee', 'essentially', 'foetus', 'graduate', 'derna', 'successful', 'likelihood', 'anjouan', 'supreme', 'ethos', 'czechoslovakia', 'capacity', 'basket', 'pillar', 'honour', 'hit', 'governance', 'school', 'gggi', 'himalayan', 'nica', 'sanitation', 'shall', 'scholarship', 'normalization', 'uncompromising', 'translate', 'empower', 'salvador', 'origin', 'humala', 'attainment', 'weak', 'afresh', 'haram', 'ideal', 'concern', 'eccas', 'pakistani', 'livable', 'prospect', 'bsec', 'mckinsey', 'fonseca', 'nkrumah', 'geographically', 'efficiency', 'protected', 'renew', 'qatar', 'flooding', 'beset', 'outright', 'cease', 'erdogan', 'natural', 'crisis', 'mediate', 'tanganyika', 'unconditional', 'samaria', 'silk', 'expansion', 'report', 'insult', 'lower', 'fisherman', 'competition', 'aguirre', 'apartheid', 'putin', 'alongside', 'laden', 'institutional', 'mauritius', 'contributor', 'influence', 'stance', 'important', 'uranium', 'chauvinism', 'mauritanian', 'waste', 'agadez', 'ransom', 'balance', 'closure', 'prudence', 'mohtarma', 'scandal', 'cuban', 'luxembourg', 'promulgation', 'extremely', 'theme', 'cica', 'investor', 'address', 'disablement', 'traffic', 'bequeath', 'huntington', 'local', 'branch', 'kalapalo', 'confirm', 'timeliness', 'reminds', 'commune', 'passenger', 'respond', 'rational', 'habitat', 'chairman', 'simply', 'unjust', 'quantitative', 'capital', 'ambition', 'mohamed', 'empathic', 'gigawatts', 'forum', 'predictability', 'montreal', 'ecowas', 'seat', 'shortcoming', 'core', 'belize', 'root', 'slovenian', 'unicef', 'confrontation', 'mercy', 'irreplaceable', 'nairobi', 'spillover', 'empathy', 'fidel', 'viewpoint', 'maldives', 'kimberley', 'deserve', 'tremendous', 'almaty', 'save', 'morocco', 'livelihood', 'capita', 'reaffirm', 'consultation', 'recycle', 'event', 'initiator', 'unflinching', 'seller', 'shift', 'sideline', 'transaction', 'resolutely', 'uneven', 'rome', 'repeatedly', 'consistently', 'accra', 'haiti', 'burundian', 'realize', 'mozambique', 'amendment', 'sdgs', 'paraguay', 'uprising', 'brotherly', 'change', 'economic', 'undong', 'carbon', 'prompt', 'static', 'photograph', 'unhcr', 'macky', 'launching', 'mandate', 'hesitation', 'te', 'anxiety', 'forthcoming', 'teach', 'expectation', 'decimated', 'leoneans', 'serbia', 'andean', 'interlink', 'wherewithal', 'enterprise', 'path', 'grouping', 'poland', 'ice', 'reassert', 'recently', 'coal', 'militia', 'tajikistan', 'improvement', 'contemporary', 'fethullah', 'mongolian', 'dakar', 'issoufou', 'baseline', 'asset', 'myanmar', 'rape', 'sandino', 'forcefully', 'energy', 'asylum', 'swiss', 'motivated', 'sustain', 'spoke', 'reiterate', 'dominate', 'bonn', 'dissatisfy', 'dictator', 'journalist', 'roughly', 'refugee', 'army', 'comorians', 'unemployment', 'iraq', 'latvia', 'strive', 'luxury', 'syrian', 'person', 'consume', 'ecological', 'oscillation', 'failed', 'npt', 'azerbaijan', 'territorial', 'tyranny', 'award', 'voiceless', 'king', 'decade', 'prove', 'speculator', 'fundamentalism', 'individual', 'tat', 'karadic', 'neglected', 'famine', 'rural', 'nelson', 'slight', 'protagonist', 'smart', 'glass', 'coffee', 'ivan', 'pertinant', 'togetherness', 'arrival', 'costly', 'algerian', 'truman', 'border', 'unrwa', 'panel', 'moscow', 'melting', 'sein', 'use', 'act', 'home', 'dependency', 'punish', 'overstate', 'patriot', 'drought', 'legitimate', 'wealth', 'index', 'tomorrow', 'rican', 'lesson', 'rally', 'monroe', 'cloak', 'entity', 'lending', 'nazarbayev', 'excellence', 'big', 'sincerely', 'mindset', 'vigilant', 'seventy', 'presidential', 'animosity', 'professor', 'amicable', 'help', 'bombing', 'fiscal', 'subregional', 'cyberattacks', 'jointly', 'artificial', 'wager', 'false', 'pathogen', 'greatness', 'extent', 'pretrial', 'sassou', 'equate', 'february', 'herzegovina', 'cholera', 'permanently', 'amia', 'sign', 'lift', 'regulate', 'jew', 'mature', 'rohingya', 'sirte', 'drastically', 'relief', 'beijing', 'need', 'neighbourhood', 'coup', 'sica', 'shoulder', 'rejection', 'monastery', 'alike', 'overwhelming', 'map', 'utmost', 'candidacy', 'transform', 'hammer', 'olive', 'consumer', 'cluster', 'ideology', 'cynicism', 'participant', 'radioactive', 'global', 'forth', 'trilateral', 'lula', 'eclac', 'migratory', 'regime', 'entry', 'insecure', 'sudanese', 'idealism', 'chibok', 'comorian', 'blockade', 'mindanao', 'manuel', 'senegal', 'best', 'ethnic', 'deliberately', 'diplomat', 'renaissance', 'methodology', 'bamyan', 'interfere', 'unmit', 'disagreement', 'tax', 'authority', 'foremost', 'excision', 'isolationism', 'reinvent', 'sentence', 'assume', 'air', 'nearly', 'responsible', 'triple', 'impact', 'warm', 'economist', 'administration', 'fairness', 'acceptance', 'movement', 'renewable', 'court', 'ichiro', 'pole', 'overhaul', 'minister', 'massacre', 'implementation', 'love', 'centenary', 'amadou', 'maori', 'hope', 'globalized', 'concession', 'attendee', 'hopeful', 'primacy', 'parliament', 'attain', 'effective', 'bhutto', 'turkmenistan', 'size', 'confident', 'australia', 'karabakh', 'power', 'open', 'secondary', 'multiethnic', 'expulsion', 'stand', 'receive', 'bolivia', 'poverty', 'memory', 'saavedra', 'achieve', 'nicaraguan', 'roaring', 'crusade', 'family', 'cure', 'hamas', 'economics', 'zaragoza', 'speed', 'armament', 'alternative', 'league', 'illustrate']
speeches_df = speeches_df[speeches_df['Region Name']!='Oceania']
for r in speeches_df['Region Name'].unique():
reg = speeches_df[speeches_df['Region Name']==r]
print(r, len(reg))
speeches_df_train, speeches_df_test = train_test_split(speeches_df, test_size=0.2, shuffle=True, random_state=42)
selected_words = []
for possible_feature in features_most_used_words:
if speeches_df[possible_feature].sum(axis=0) > 20:
selected_words.append(possible_feature)
print('the number of words we use in the end:', len(selected_words))
# set the list of features to use as well as the predictable
features = ['word_count', 'pos_sentiment', 'neg_sentiment', 'neu_sentiment', 'average_sentence_length'] + \
selected_words
predictable = ['Region Name']
# set the parameters that will be tested in the grid search
lgbm_grid_params = {
'learning_rate': [0.005, 0.01, 0.1],
'n_estimators': [16, 24, 28, 500, 1000, 2000, 3000],
'min_child_samples': [1, 10, 50, 100],
'num_leaves': [4, 8, 12, 16, 20], # large num_leaves helps improve accuracy but might lead to over-fitting
'boosting_type': ['gbdt'], # for better accuracy -> try dart
'max_bin': [255, 510, 1020], # large max_bin helps improve accuracy but might slow down training progress
'random_state': [42],
'colsample_bytree': [0.64, 0.65, 0.8, 1],
'subsample': [0.5, 0.7, 0.75, 0.8, 0.9, 1],
'reg_alpha': [0, 0.25, 0.5, 0.75, 1, 1.2],
'reg_lambda': [0, 0.25, 0.5, 0.75, 1, 1.2, 1.4],
'verbose': [-1]
}
# create the models
lgbm_model = lgb.LGBMClassifier()
n_splits = 5
# evaluation before optimisation
k_fold_evaluation(speeches_df_train, lgbm_model, n_splits, features, predictable)
test_set_evaluation(speeches_df_train, speeches_df_test, lgbm_model, features, predictable)
# grid search for the best parameters
best_parameters = model_gridsearch(speeches_df_train, lgbm_model, n_splits, lgbm_grid_params, features, predictable)
print("The best parameters are:", best_parameters)
# look at increase of scores
lgbm_model_optimised = lgb.LGBMClassifier(**best_parameters)
k_fold_evaluation(speeches_df_train, lgbm_model_optimised, n_splits, features, predictable)
# final evalutation on unseen test set
lgbm_model_optimised_test = lgb.LGBMClassifier(**best_parameters)
(y_true, y_predicted) = test_set_evaluation(speeches_df_train, speeches_df_test, lgbm_model_optimised_test, features, predictable)
mat = confusion_matrix(y_true, y_predicted)
print(mat)
regions = ['Africa', 'Americas', 'Asia', 'Europe']
fig, ax = plt.subplots()
ax.imshow(mat, cmap='viridis', interpolation='nearest')
ax.set_xlabel('predicted value', fontsize=24)
ax.set_ylabel('true value', fontsize=24);
ax.set_xticks([0,1,2,3])
ax.set_yticks([0,1,2,3])
ax.set_xticklabels(regions, rotation=45)
ax.set_yticklabels(regions)
ax.tick_params(axis='both', which='major', labelsize=34)
# Loop over data to create text annotations.
for i in range(len(mat)):
for j in range(len(mat)):
text = ax.text(j, i, mat[i, j],
ha="center", va="center", color="w", size=45)
# plt.savefig('cm.png', dpi=300)
plt.show()