-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
256 lines (206 loc) · 11.2 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
import streamlit as st
import preprocessor
import helper
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
# Setting up Streamlit configuration to suppress deprecation warnings
st.set_option('deprecation.showPyplotGlobalUse', False)
@st.cache_data(show_spinner=False, persist="disk")
def load_data(uploaded_file):
bytes_data = uploaded_file.getvalue()
data = bytes_data.decode("utf-8")
return preprocessor.preprocess(data)
def load_css(css_path):
with open(css_path, "r") as f:
css = f.read()
st.markdown(f"<style>{css}</style>", unsafe_allow_html=True)
def main():
load_css("style.css")
st.image("images/logo.png", use_column_width=True)
st.title('WhatsApp Chat Analyzer')
st.write("""
Welcome to **Wanalyicia** - Your Personal WhatsApp Chat Insight Engine!
Dive deep into your chat narratives and discover patterns you never noticed before. From sentiment landscapes to user engagement metrics,
Wanalyicia unveils a spectrum of data-driven revelations from your chats.
""")
st.write('**Guidelines to Begin:**')
st.write("""
1. Export your WhatsApp chat Without Media.
2. Upload the exported chat file.
3. Choose the user and the type of analysis.
4. View insights and charts based on your chat data.
""")
st.write("---")
st.sidebar.image("images/applogo.png", use_column_width=True)
uploaded_file = st.sidebar.file_uploader("Upload Exported Chat", type=["txt", "csv"])
if uploaded_file:
df = load_data(uploaded_file)
# Fetch unique Users
user_list = df['username'].unique().tolist()
user_list.sort()
user_list.insert(0, "Overall Users")
# Adding a search box for selecting the user
search_user = st.sidebar.text_input("Search for a User", "")
# Filter the user list based on the search input
filtered_users = [user for user in user_list if search_user.lower() in user.lower()]
# Creating a select box with the filtered user list
selected_user = st.sidebar.selectbox("Select The User", filtered_users)
if selected_user == "Overall Users":
analysis_menu = ["User Statistics", "Sentiment Analysis", "Advanced NLP Analysis", "Comparative Analysis", "User Activity", "Word and Emoji Analysis", "Timeline Analysis"]
else:
analysis_menu = ["User Statistics", "Sentiment Analysis", "Advanced NLP Analysis", "User Activity", "Word and Emoji Analysis", "Timeline Analysis"]
st.sidebar.header("Analysis Options")
choice = st.sidebar.selectbox("Select Analysis Type", analysis_menu, index=0)
if choice == "Comparative Analysis":
st.subheader("Comparative Analysis between Users")
users_to_compare = st.multiselect("Select users for comparison", user_list)
st.write("---")
if users_to_compare:
min_date = df["date"].min().date()
max_date = df["date"].max().date()
selected_range = st.slider("Select Time Range", min_date, max_date, (min_date, max_date))
st.write("---")
if st.sidebar.button("Show Comparative Analysis", key="comparative_analysis_button"):
users_activity = helper.perform_comparative_analysis(df, users_to_compare, selected_range[0],
selected_range[1])
st.bar_chart(users_activity)
elif st.sidebar.button("Start Analysis"):
# User Statistics
if choice == "User Statistics":
total_messages, total_words, total_media_messages, total_url, total_emoji, deleted_message, edited_messages, shared_contact, shared_location = helper.fetch_stats(
selected_user, df)
st.markdown("### Total Messages Shared: ")
st.write(f"<div class='big-font'>{total_messages}</div>", unsafe_allow_html=True)
st.write("---")
st.markdown("### Total Words Shared: ")
st.write(f"<div class='big-font'>{total_words}</div>", unsafe_allow_html=True)
st.write("---")
st.markdown("### Total Media Shared: ")
st.write(f"<div class='big-font'>{total_media_messages}</div>", unsafe_allow_html=True)
st.write("---")
st.markdown("### Total Link Shared: ")
st.write(f"<div class='big-font'>{total_url}</div>", unsafe_allow_html=True)
st.write("---")
st.markdown("### Total Emoji Shared: ")
st.write(f"<div class='big-font'>{total_emoji}</div>", unsafe_allow_html=True)
st.write("---")
st.markdown("### Total Deleted Message: ")
st.write(f"<div class='big-font'>{deleted_message}</div>", unsafe_allow_html=True)
st.write("---")
st.markdown("### Total Edited Message: ")
st.write(f"<div class='big-font'>{edited_messages}</div>", unsafe_allow_html=True)
st.write("---")
st.markdown("### Total Contact Shared: ")
st.write(f"<div class='big-font'>{shared_contact}</div>", unsafe_allow_html=True)
st.write("---")
st.markdown("### Total Location Shared: ")
st.write(f"<div class='big-font'>{shared_location}</div>", unsafe_allow_html=True)
# Sentiment Analysis
elif choice == "Sentiment Analysis":
if selected_user != 'Overall Users':
df = df[df['username'] == selected_user]
df['Sentiment'] = df['message'].apply(helper.extract_sentiment)
st.subheader("Sentiment Distribution")
sentiment_distribution = df['Sentiment'].value_counts()
fig = px.bar(sentiment_distribution, labels={'index': 'Sentiment', 'value': 'Count'})
st.plotly_chart(fig)
st.write("---")
# Sentiment Trends Over Time
if 'date' in df.columns:
st.subheader("Sentiment Trends Over Time")
# Grouping data by date and sentiment to count occurrences
sentiment_over_time = df.groupby(['date', 'Sentiment']).size().reset_index(name='Counts')
fig, ax = plt.subplots(figsize=(12, 8))
sns.lineplot(data=sentiment_over_time, x='date', y='Counts', hue='Sentiment', ax=ax)
st.pyplot(fig)
# Advanced NLP
elif choice == "Advanced NLP Analysis":
st.subheader("TF-IDF Analysis")
top_words = helper.perform_tfidf_analysis(df['message'])
st.write("Top 5 words based on TF-IDF scores:", top_words)
st.subheader("LDA Topic Modeling")
topic_words = helper.perform_lda_analysis(df['message'], 5)
for topic in topic_words:
st.write(topic)
# User Activity
elif choice == "User Activity":
if selected_user == 'Overall Users':
top, bottom = helper.most_least_busy_users(df)
st.subheader('Most Active Users')
st.bar_chart(top)
st.write("---")
st.subheader('Least Active Users')
st.bar_chart(bottom)
st.write("---")
else:
# Creating a line chart to visualize user activity over time
st.subheader("User Activity Over Time")
user_activity = helper.user_activity_over_time(selected_user, df)
st.line_chart(user_activity)
st.write("---")
# Week Activity Map
st.subheader("Week Activity Map")
week_activity_data = helper.week_activity_map(selected_user, df)
fig, ax = plt.subplots(figsize=(8, 6))
week_activity_data.sort_index().plot(kind='bar', ax=ax)
ax.set_title("Activity Throughout the Week")
ax.set_ylabel("Number of Messages")
ax.set_xlabel("Day of the Week")
st.pyplot(fig)
st.write("---")
# Month Activity Map
st.subheader("Month Activity Map")
month_activity_data = helper.month_activity_map(selected_user, df)
fig, ax = plt.subplots(figsize=(8, 6))
month_activity_data.sort_index().plot(kind='bar', ax=ax)
ax.set_title("Activity Throughout the Month")
ax.set_ylabel("Number of Messages")
ax.set_xlabel("Month")
st.pyplot(fig)
st.write("---")
# Activity Heatmap
st.subheader("Activity Heatmap")
heatmap_data = helper.activity_heatmap(selected_user, df)
fig, ax = plt.subplots(figsize=(12, 8))
sns.heatmap(heatmap_data, cmap='viridis', annot=True, fmt=".0f", ax=ax)
ax.set_title("Activity Heatmap: Day vs. Period")
st.pyplot(fig)
# Word and Emoji Analysis
elif choice == "Word and Emoji Analysis":
wc_array = helper.create_wordcloud(selected_user, df)
st.subheader("Word Cloud")
st.image(wc_array, caption="Word Cloud of Chat", use_column_width=True)
# Emoji Analysis
emoji_df = helper.emoji_helper(selected_user, df)
st.subheader("Emoji Analysis")
st.dataframe(emoji_df.head(5)) # Display top 5 emojis
# Display a pie chart for top 5 emojis
if not emoji_df.empty:
fig, ax = plt.subplots()
ax.pie(emoji_df['Frequency'].head(), labels=emoji_df['Emoji'].head(), autopct='%1.1f%%')
st.pyplot(fig)
# Timeline Analysis
elif choice == "Timeline Analysis":
monthly_timeline = helper.monthly_timeline(selected_user, df)
daily_timeline = helper.daily_timeline(selected_user, df)
st.subheader("Monthly Timeline")
st.line_chart(monthly_timeline)
st.write("---")
st.subheader("Daily Timeline")
st.line_chart(daily_timeline)
st.sidebar.markdown("---")
st.sidebar.header("We Value Your Feedback")
# Dropdown for selecting feedback
was_helpful = st.sidebar.selectbox("Did you find our insights useful?", ["Please choose an option", "Yes, very helpful", "Somewhat helpful", "Not helpful"])
if was_helpful != "Please choose an option":
if was_helpful != "Please choose an option":
feedback = st.sidebar.text_area("Kindly share any additional comments or suggestions...")
if st.sidebar.button("Submit Feedback"):
st.sidebar.success("Thank you for sharing your thoughts!")
st.sidebar.markdown("---")
st.sidebar.markdown("© 2023 Wanalyicia. All rights reserved.")
if __name__ == '__main__':
st.set_page_config(layout="wide")
st.set_option('deprecation.showPyplotGlobalUse', False)
main()