-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpage_paths.py
203 lines (163 loc) · 6.92 KB
/
page_paths.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# -*- coding: utf-8 -*-
import csv
import urllib
from apiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
class PagePaths(object):
"""Class responsible for managing connection to the Google Analytics and
fetching page paths and connections between them.
Args:
view_id(string): ID of the GA view.
key_file_location(string): path to the client credentials JSON file.
start_date(string, optional): start date used in report, defaults to 'yesterday'.
"""
scopes = ['https://www.googleapis.com/auth/analytics.readonly']
view_id = ''
start_date = ''
def __init__(self, view_id, key_file_location, start_date='yesterday'):
self.view_id = view_id
self.start_date = start_date
credentials = ServiceAccountCredentials.from_json_keyfile_name(
key_file_location, self.scopes)
self.analytics = build('analyticsreporting', 'v4', credentials=credentials)
def filter_batch(self, response, paths=[], connections=[]):
"""Parses and filters the Analytics Reporting API V4 response.
Args:
response(dict): an Analytics Reporting API V4 response.
Returns:
list: provided paths list plus new page paths from the current batch.
list: provided connections list plus new connections.
"""
for report in response.get('reports', []):
for row in report.get('data', {}).get('rows', []):
metrics = row.get('metrics', [])
dimensions = row.get('dimensions', [])
if len(dimensions) < 3:
continue
prev = self.filter_path(dimensions[1])
current = self.filter_path(dimensions[2])
if prev is None or current is None or prev == current:
continue
if prev in paths:
prev_index = paths.index(prev)
else:
paths.append(prev)
prev_index = len(paths) - 1
if current in paths:
current_index = paths.index(current)
else:
paths.append(current)
current_index = len(paths) - 1
connections.append([prev_index, current_index, int(metrics[0]['values'][0])])
return paths, connections
def filter_path(self, path):
"""Removes GET parameters from path.
Args:
path(string)
Returns:
string|None: filtered path or None if invalid.
"""
if path.startswith('/http') or path.startswith('http'):
return None
query_start_index = path.find('?')
if query_start_index != -1:
path = path[:query_start_index]
if not path.startswith('/'):
path = path + '/'
if not path.endswith('/'):
path += '/'
return path
def get_batch(self, from_index=0):
"""Queries the Analytics Reporting API V4.
Args:
from_index(int, optional): offset for current batch, defaults to 0.
Returns:
dict: the Analytics Reporting API V4 response.
"""
return self.analytics.reports().batchGet(
body = {
'reportRequests': [{
'viewId': self.view_id,
'pageToken': str(from_index),
'pageSize': 1000,
"filtersExpression": "ga:previousPagePath!=(entrance)",
'dateRanges': [{'startDate': self.start_date, 'endDate': 'today'}],
'metrics': [{'expression': 'ga:pageviews'}],
'dimensions': [{'name': 'ga:date'}, {'name': 'ga:previousPagePath'}, {'name': 'ga:pagePath'}]
}]
}
).execute()
def get(self):
"""Queries, filters and parses page paths obtained from Google Analytics.
Returns:
list: unique page paths.
list: previous and next page indices with counts.
"""
paths = []
connections = []
more_pages = True
from_index = 0
while more_pages:
response = self.get_batch(from_index)
reports = response.get('reports', [])
if len(reports) and 'nextPageToken' in reports[0]:
more_pages = True
from_index = reports[0]['nextPageToken']
else:
more_pages = False
paths, connections = self.filter_batch(response, paths, connections)
return paths, connections
def show(self, response):
"""Parses and prints the Analytics Reporting API V4 response.
Args:
response(dict): an Analytics Reporting API V4 response.
"""
for report in response.get('reports', []):
column_header = report.get('columnHeader', {})
dimension_headers = column_header.get('dimensions', [])
metric_headers = column_header.get('metricHeader', {}).get('metricHeaderEntries', [])
row_count = 0
for row in report.get('data', {}).get('rows', []):
metrics = row.get('metrics', [])
dimensions = row.get('dimensions', [])
print(str(row_count) + '. ')
for header, dimension in zip(dimension_headers, dimensions):
print(header + ': ' + dimension)
for i, values in enumerate(metrics):
for metric_header, value in zip(metric_headers, values.get('values')):
print(metric_header.get('name') + '=' + value)
row_count += 1
def write(self, data, file_name):
"""Writes given data to file_name.csv
Args:
data(iterable)
file_name(string)
"""
if not file_name.endswith('.csv'):
file_name += '.csv'
with open(file_name, 'w') as file:
writer = csv.writer(file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
for row in data:
writer.writerow(row if isinstance(row, list) else [row])
def read(self, file_name, is_flat=False):
"""Reads data from file_name.csv.
Args:
file_name(string)
is_flat(bool): should the reader expect flat, 1D data in file.
Returns:
list
"""
if not file_name.endswith('.csv'):
file_name += '.csv'
data = []
with open(file_name) as file:
reader = csv.reader(file, delimiter=',', quotechar='"')
if is_flat:
for row in reader:
data.append(self._maybe_to_number(row[0]))
else:
for row in reader:
data.append([self._maybe_to_number(el) for el in row])
return data
def _maybe_to_number(self, x):
return (float(x) if '.' in x else int(x)) if x.isdigit() else x