forked from foxpcteam/AI-Marketer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSTP.py
114 lines (94 loc) · 5.69 KB
/
STP.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import pandas as pd
from pycaret.clustering import *
import os
import numpy as np
import matplotlib.pyplot as plt
from util import p_title
from io import StringIO
import streamlit as st
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
def STP(nav):
if nav == '👨👩👦👦Customer Segmentation':
st.text('')
p_title('👨👩👦👦Customer Segmentation')
st.text('')
st.markdown(':white_check_mark:Segment your customers and target on the prospect')
if st.button('Get Demo Data') :
data = pd.read_csv('./dataset/Mall_Customers.csv', encoding= 'unicode_escape')
# if rerun by changing the file
st.write(data.head(10))
with st.spinner('Training model...'):
s = setup(data, normalize = True, silent=True, ignore_features = ["CustomerID"])
st.session_state['kmeans'] = create_model('kmeans')
st.session_state['kmeans_results'] = assign_model(st.session_state['kmeans'])
plot_model(st.session_state['kmeans'], plot = 'cluster',display_format="streamlit")
st.write('The following graph indicates the optimum numbers of cluster, based on the number of k (k=?), you should input the number of k in "Number of Clusters you would like"')
st.title('Elbow Graph')
col1, col2 = st.columns(2)
with col1 :
plot_model(st.session_state['kmeans'],plot='elbow',display_format='streamlit')
# Inspect Clusters
st.title('Clusters')
st.dataframe(st.session_state['kmeans_results'])
st.title("Cluster Info")
stats = st.session_state['kmeans_results'][[i for i in st.session_state['kmeans_results'].columns if i != "CustomerID"]]
stats = stats.groupby(['Cluster']).mean().T
stats = stats.rename_axis(None, axis = 1)
stats = stats.reset_index().rename(columns={'index': 'attributes'})
fig = px.bar(stats, x="attributes", y=[i for i in stats.columns if i != 'attributes'], barmode='group')
st.plotly_chart(fig, use_container_width=True)
else :
uploaded_file = st.file_uploader("Choose a file")
id_name = st.text_input("Enter the column name of the customers' ID (Optional)", 'CustomerID')
selected_k = st.text_input("Number of Clusters you would like (If you don't know, you could leave it blank and we will determine the optimal number of clusters in the elbow graph and you could use it to rerun again)", '')
if uploaded_file is not None:
# To read file as bytes:
bytes_data = uploaded_file.getvalue()
#st.write(bytes_data)
# Can be used wherever a "file-like" object is accepted:
data = pd.read_csv(uploaded_file, encoding= 'unicode_escape')
# if rerun by changing the file
st.write(data.head(10))
if st.button('Cluster'):
with st.spinner('Training model...'):
ID = str(id_name)
try:
s = setup(data, normalize = True, silent=True, ignore_features = [ID])
except:
st.error("Wrong Input")
if selected_k == '':
st.session_state['kmeans'] = create_model('kmeans')
else:
st.session_state['kmeans'] = create_model('kmeans', num_clusters = int(selected_k))
st.session_state['kmeans_results'] = assign_model(st.session_state['kmeans'])
if 'kmeans_results' in st.session_state:
plot_model(st.session_state['kmeans'], plot = 'cluster',display_format="streamlit")
col1,col2 = st.columns(2)
with col1:
st.text('The following graph indicates the optimum numbers of cluster, based on the number of k (k=?), you should input the number to Number of Clusters you would like')
plot_model(st.session_state['kmeans'],plot='elbow',display_format='streamlit')
# Inspect Clusters
st.title('Clusters')
st.dataframe(st.session_state['kmeans_results'])
@st.cache
def convert_df(df):
# IMPORTANT: Cache the conversion to prevent computation on every rerun
return df.to_csv().encode('utf-8')
csv = convert_df(st.session_state['kmeans_results'])
st.download_button(
label="Download data as CSV",
data=csv,
file_name='cluster.csv',
mime='text/csv',
)
#Cluster info
with st.spinner("Analyzing"):
st.title("Cluster Info")
stats = st.session_state['kmeans_results'][[i for i in st.session_state['kmeans_results'].columns if i != ID]]
stats = stats.groupby(['Cluster']).mean().T
stats = stats.rename_axis(None, axis = 1)
stats = stats.reset_index().rename(columns={'index': 'attributes'})
fig = px.bar(stats, x="attributes", y=[i for i in stats.columns if i != 'attributes'], barmode='group')
st.plotly_chart(fig, use_container_width=True)