-
Notifications
You must be signed in to change notification settings - Fork 1
/
app.py
146 lines (108 loc) · 5.12 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import time
import joblib
import numpy as np
import pandas as pd
import streamlit as st
from sklearn import metrics
import matplotlib.pyplot as plt
st.title('Net Guardian :shield: :globe_with_meridians:')
st.write('''#### A Machine Learning App for Network Anomaly Detection''')
st.sidebar.title('Parameter Selection :')
model = st.sidebar.selectbox('Select a Machine Learning Algorithm :',
['Logistic Regression',
'Decision Tree Classifier',
'Random Forest Classifier',
'Naive Bayes Classifier'])
size = st.sidebar.selectbox('Choose size for DataSet :', [0.10, 0.25, 0.30, 0.5, 1.0])
st.sidebar.subheader('Details :')
link = st.sidebar.link_button(label = 'DataSet', url = 'https://www.kaggle.com/datasets/mrwellsdavid/unsw-nb15/data?select=UNSW_NB15_training-set.csv')
st.sidebar.subheader('Research Papers :')
st.sidebar.link_button(label = 'Paper 1', url = 'https://drive.google.com/file/d/1XwvH6DyBif2o53CtLHoo33xaumjdJfK3/view?usp=share_link')
st.sidebar.link_button(label = 'Paper 2', url = 'https://drive.google.com/file/d/1Ti6FB5fSDx8GhPIZ8vyBHUxNvmurgO3A/view?usp=share_link')
st.sidebar.link_button(label = 'Paper 3', url = 'https://drive.google.com/file/d/1T3IE3xE8I7VouWwF2Bpu40F_XpfiCcX2/view?usp=share_link')
def make_prediction(model, size):
if model == 'Logistic Regression':
ml_model = joblib.load('Log_Reg.joblib')
elif model == 'Decision Tree Classifier':
ml_model = joblib.load('Dec_Tree.joblib')
elif model == 'Random Forest Classifier':
ml_model = joblib.load('Random_Forest.joblib')
elif model == 'Naive Bayes Classifier':
ml_model = joblib.load('Naive_Bayes.joblib')
test = pd.read_csv('UNSW_NB15_testing-set.csv')
test = test.sample(frac = size, random_state = 42)
object_cols = test.select_dtypes(include = 'object').columns
test.drop(columns = object_cols, inplace = True)
x = test.drop('label', axis = 1)
y = test['label']
y_preds = ml_model.predict(x)
acc = ml_model.score(x, y)
prec_score = metrics.precision_score(y_pred = y_preds, y_true = y)
recall_score = metrics.recall_score(y_pred = y_preds, y_true = y)
f1_score = metrics.f1_score(y_pred = y_preds, y_true = y)
return acc, prec_score, recall_score, f1_score
def compare_models():
test = pd.read_csv('UNSW_NB15_testing-set.csv')
test = test.sample(frac = size, random_state = 42)
object_cols = test.select_dtypes(include = 'object').columns
test.drop(columns = object_cols, inplace = True)
x = test.drop('label', axis = 1)
y = test['label']
y_pred_dict = {}
models = {
'Logistic Regression': joblib.load('Log_Reg.joblib'),
'Decision Tree':joblib.load('Dec_Tree.joblib'),
'Random Forest': joblib.load('Random_Forest.joblib'),
'Naive Bayes': joblib.load('Naive_Bayes.joblib')}
for model_name, model in models.items():
y_pred = model.predict(x)
y_pred_dict[model_name] = y_pred
evaluation_results = []
for model_name, y_pred in y_pred_dict.items():
accuracy = metrics.accuracy_score(y, y_pred)
precision = metrics.precision_score(y, y_pred)
recall = metrics.recall_score(y, y_pred)
f1 = metrics.f1_score(y, y_pred)
evaluation_results.append({
'Model': model_name,
'Accuracy': accuracy,
'Precision': precision,
'Recall': recall,
'F1-score': f1})
accuracy_results = []
for model_name, y_pred in y_pred_dict.items():
accuracy = metrics.accuracy_score(y, y_pred) * 100
accuracy_results.append({
'Model': model_name,
'Accuracy': f"{accuracy:.2f}%"})
df_results = pd.DataFrame(evaluation_results)
st.dataframe(df_results)
df_accuracy = pd.DataFrame(accuracy_results)
df_accuracy_sorted = df_accuracy.sort_values(by = 'Accuracy', ascending = False)
st.subheader('Accuracy Sorted :')
st.dataframe(df_accuracy_sorted)
st.subheader('Graphical Analysis :')
st.bar_chart(df_accuracy_sorted.set_index('Model'))
st.write('''---''')
st.write('''##### Selected Model : {}'''.format(model))
st.write('''##### Selected Dataset Size : {} {}'''.format(size * 100, '%'))
with st.container():
tab1, tab2 = st.tabs(['Run Tests', 'Compare'])
with tab1:
ok = st.button(label = 'Run Selected Model')
if ok:
with st.spinner('Running the Model ...'):
time.sleep(3)
accuracy, prec_score, recall_score, f1_score = make_prediction(model, size)
data = {'Precision Score' : [f"{prec_score * 100:.2f} %"],
'Recall Score' : [f"{recall_score * 100:.2f} %"],
'F1 Score' : [f"{f1_score * 100:.2f} %"]}
st.write('''#### Accuracy : {} %'''.format(round(accuracy * 100, 2)))
st.dataframe(data)
with tab2:
ok1 = st.button(label = 'Compare All Models')
if ok1:
with st.spinner('Generating Insights from Models .... '):
time.sleep(3)
st.subheader('Analysis of all Models : ')
compare_models()