This repository has been archived by the owner on Jan 11, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathpathing_demo.py
226 lines (111 loc) · 4.8 KB
/
pathing_demo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
#!/usr/bin/env python
# coding: utf-8
# # Pathing Demo notebook
# ## Setup
# In[ ]:
# get_ipython().run_line_magic('matplotlib', 'inline')
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from textwrap import wrap
from pathutils import (get_popular_urls, funnel_in_outs,funnel_stats, sankey_funnel,
frequent_funnel, analyze_clicks, analyze_traffic, utils, manage_resolutions,
url_regex_resolver, analyze_timing)
# In[ ]:
#HAUSERDIR = "<Path to your Hauser folder>"
HAUSERDIR = "sampledata"
# `LIMITROWS` limits the number of rows in the output of most popular URLs
LIMITROWS = 0
# Hauser is an open source tool you can use to export data from FullStory into your filesystem: https://github.com/fullstorydev/hauser
# ## Load Data Export data into a dataframe
# In[ ]:
dffull = analyze_traffic.get_hauser_as_df(HAUSERDIR, navigate_only=False)
# ## Inspect your dataframe(s)
# You can find Data Export field descriptions on FullStory's API reference site: https://developer.fullstory.com/get-data-export
# In[ ]:
dffull = utils.preproc_events(dffull)
dffull.head(15)
# ## Filter out any events that aren't navigation events
# In[ ]:
#Optional: you can also filter your dataset to only include sessions with clicks of certain type
#dffull = analyze_clicks.filter_dataset_by_clicktype(dffull, "rage")
df = analyze_clicks.remove_non_navigation(dffull)
df.head(15)
# ## Plot a diagram of top most visited URLs
# In[ ]:
useResolvedUrls = False
url_counts = get_popular_urls.get_popular(df, useResolvedUrls, LIMITROWS)
TOPCOUNTS = 20 # limit output rows
analyze_traffic.plot_counts_by_freq(url_counts, TOPCOUNTS, "URL Counts", False)
# ## Show common funnels that include the specified URL
# In[ ]:
TESTURL = "https://www.oodatime.com/cart"
FUNNELLEN = 3
NUMFUNNELSTOSHOW = 4
top_funnels = frequent_funnel.get_top_funnels_df(TESTURL, FUNNELLEN, useResolvedUrls, df, LIMITROWS)
frequent_funnel.print_top_funnel_counts(top_funnels, NUMFUNNELSTOSHOW)
# ## Show conversion statistics for the specified funnel
# In[ ]:
test_funnel=["https://www.oodatime.com/collections/mens",
"https://www.oodatime.com/collections/mens/products/blue-watch",
"https://www.oodatime.com/cart"]
funnel_counts = funnel_stats.get_funnel_stats(df, test_funnel, useResolvedUrls, LIMITROWS)
analyze_traffic.plot_counts_by_freq(funnel_counts, 0, "Funnel Counts", True)
# ## Plot sankey diagram for the specified funnel
# In[ ]:
sankey_funnel.plot_funnel("Blue Watch Funnel", df, test_funnel, useResolvedUrls, cutoff=4)
# ## Generate session links for the specified funnel
# **Note:** This only works if you are a FullStory user (use your Org ID)
# In[ ]:
ORGID = "NHQ5G"
STAGING = False
sessions = analyze_traffic.get_sessions_for_funnel(df, test_funnel, useResolvedUrls, ORGID, STAGING, True, 5)
for s in sessions:
print(s)
# ## Generate session links for a funnel that include a specified click type
# In[ ]:
ORGID = "NHQ5G"
STAGING = False
clicktype = "rage"
sessions = analyze_traffic.get_sessions_for_funnel_and_click(dffull, test_funnel, clicktype, useResolvedUrls, ORGID, STAGING, True, 5)
for s in sessions:
print(s)
# ## Print timing stats for a funnel
# In[12]:
funtimes = analyze_timing.get_timing_for_funnel(dffull, test_funnel, useResolvedUrls)
analyze_timing.print_timing_averages(test_funnel, funtimes)
# ## Generate timing histogram for 1 step of the funnel
# In[13]:
analyze_timing.plot_timing_data(test_funnel, funtimes, 0)
# ## Generate timing histogram for all steps of the funnel
# In[14]:
analyze_timing.plot_timing_data(test_funnel, funtimes, -1)
# ## Generate inflow and outflow counts for the specified funnel
# In[15]:
ingress, egress = funnel_in_outs.get_in_outs(df, test_funnel, useResolvedUrls, LIMITROWS)
# ## Plot inflow statistics
# In[16]:
analyze_traffic.plot_counts_by_freq(ingress, 0, "Ingress", False)
# ## Plot outflow statistics
# In[17]:
analyze_traffic.plot_counts_by_freq(egress, 0, "Egress", False)
# ## Plot most visited URLs again (for illustration purposes)
# In[18]:
url_counts = get_popular_urls.get_popular(df, useResolvedUrls, LIMITROWS)
TOPCOUNTS = 20
analyze_traffic.plot_counts_by_freq(url_counts, TOPCOUNTS, "URL Counts", False)
# ## Display current list of URL resolution rules
# In[19]:
manage_resolutions.show_rules()
# ## Add URL resolution rule
# In[20]:
manage_resolutions.add_rule("/products/(black|blue|red|gold|rainbow)-watch","/products/<any-watch>")
# ## Display URL resolution rules again
# In[21]:
manage_resolutions.show_rules()
# ## Plot most visited resolved URLs
# In[22]:
useResolvedUrls = True
url_counts = get_popular_urls.get_popular(df, useResolvedUrls, LIMITROWS)
TOPCOUNTS = 20
analyze_traffic.plot_counts_by_freq(url_counts, TOPCOUNTS, "URL Counts", False)