-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate-analytics-monthly.py
128 lines (108 loc) · 4.41 KB
/
generate-analytics-monthly.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import pandas as pd
import os
from googleapiclient.discovery import build
from google.oauth2 import service_account
import logging
SCOPES = ['https://www.googleapis.com/auth/analytics.readonly']
BASE_DIR = '/home/pdaniel/web-analytics/'
def initialize_analyticsreporting():
"""Establish a connection with the google reporting API V4"""
credentials = service_account.Credentials.from_service_account_file(KEY_FILE_LOCATION)
analytics = build('analyticsreporting', 'v4', credentials=credentials, cache_discovery=False)
return analytics
def print_response(response):
list = []
# get report data
for report in response.get('reports', []):
# set column headers
columnHeader = report.get('columnHeader', {})
dimensionHeaders = columnHeader.get('dimensions', [])
metricHeaders = columnHeader.get('metricHeader', {}).get('metricHeaderEntries', [])
rows = report.get('data', {}).get('rows', [])
for row in rows:
# create dict for each row
dict = {}
dimensions = row.get('dimensions', [])
dateRangeValues = row.get('metrics', [])
# fill dict with dimension header (key) and dimension value (value)
for header, dimension in zip(dimensionHeaders, dimensions):
dict[header] = dimension
# fill dict with metric header (key) and metric value (value)
for i, values in enumerate(dateRangeValues):
for metric, value in zip(metricHeaders, values.get('values')):
#set int as int, float a float
if ',' in value or '.' in value:
dict[metric.get('name')] = float(value)
else:
dict[metric.get('name')] = int(value)
list.append(dict)
df = pd.DataFrame(list)
return df
def get_report(analytics,metric):
return analytics.reports().batchGet(
body={
'reportRequests': [
{
'viewId': VIEW_ID,
'dimensions': [{'name': 'ga:date'}],
'metrics': [{'expression': metric}],
'dateRanges': [{'startDate': '365daysAgo', 'endDate': 'today'}]
}]
}
).execute()
def write_dataframe(df):
out_df = df[['date','sessions','users']].copy(deep=True)
out_df['date'] = pd.to_datetime(out_df['date'])
out_df.index = out_df['date']
monthly_df = out_df.resample('1M').sum()
monthly_df['sessions'] = monthly_df['sessions'].astype(str)
monthly_df['users'] = monthly_df['users'].astype(str)
monthly_df = monthly_df.reset_index()
monthly_df.to_csv(os.path.join(BASE_DIR,'data',FILE_NAME),index=False)
def build_dataframe():
metrics = [
"ga:users",
"ga:newUsers",
"ga:percentNewSessions",
"ga:sessions",
"ga:bounces",
"ga:sessionDuration",
"ga:entrances",
"ga:pageviews",
"ga:pageviewsPerSession"
]
for i, metric in enumerate(metrics):
""" Build a dataframe for each varaible """
response = get_report(initialize_analyticsreporting(),metric) # read data from a JSON format
if i == 0:
df = print_response(response)
else:
df[metric] = print_response(response)[metric]
for col in df.columns:
new_name = col.split(":")[-1]
df.rename(columns={col:new_name},inplace=True)
write_dataframe(df)
def copy_file_to_webserver():
"""Copy images from model runs to webserver where they can be viewed publically."""
try:
os.system('scp -i /etc/ssh/keys/pdaniel/scp_rsa {} skyrocket8.mbari.org:/var/www/html/data/web-analytics/ '.format(os.path.join(BASE_DIR,'data',FILE_NAME)))
except:
logging.debug('Unabled to Copy Analytics File to Skyrocket')
if __name__ == "__main__":
KEY_FILE_LOCATION = os.path.join(BASE_DIR,'keys/cencoos-web-analytics-5303bfd7dcbb.json')
VIEW_ID = '10796414'
FILE_NAME = 'analytics-data-monthly.csv'
build_dataframe()
copy_file_to_webserver()
# Run again for Data Services
KEY_FILE_LOCATION = os.path.join(BASE_DIR,'keys/resounding-axe-293817-00844a9aafb0.json')
VIEW_ID = "180542384"
FILE_NAME = 'services-analytics-data-monthly.csv'
build_dataframe()
copy_file_to_webserver()
# Run again for Data Services
KEY_FILE_LOCATION = os.path.join(BASE_DIR,'keys/resounding-axe-293817-00844a9aafb0.json')
VIEW_ID = "81137966"
FILE_NAME = 'portal-analytics-data-monthly.csv'
build_dataframe()
copy_file_to_webserver()