-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSQL_ETL_pipe_line_3.py
62 lines (44 loc) · 1.69 KB
/
SQL_ETL_pipe_line_3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import pandas as pd
from sqlalchemy import create_engine
import db_credentials as db
import sql_etl_table_1 as et
# creating sql engine
engine = create_engine(f'postgresql://{db.user}:{db.password}@{db.host}:{db.port}/{db.database}')
engine.connect()
# financial_performance
# 1) What is the average fare amount per trip?
avg_fare_amount_query = '''
SELECT AVG(fare_amount) AS avg_fare_amount
FROM nyc_taxi;
'''
avg_fare_amount = pd.read_sql(avg_fare_amount_query, engine)
#print(avg_fare_amount)
# 2) How much revenue was generated from tolls and surcharges combined?
total_tolls_and_surcharges_revenue_query = '''
SELECT SUM( tolls_amount + improvement_surcharge) AS total_tolls_and_surcharges_revenue
FROM nyc_taxi;
'''
total_tolls_and_surcharges_revenue = pd.read_sql(total_tolls_and_surcharges_revenue_query, engine)
#print(total_tolls_and_surcharges_revenue)
# 3) What is the average total amount paid by passengers?
avg_total_amount_query = '''
SELECT AVG(total_amount) AS avg_total_amount
FROM nyc_taxi;
'''
avg_total_amount = pd.read_sql(avg_total_amount_query, engine)
#print(avg_total_amount)
# creating report table for financial performance
report = '''
SELECT CURRENT_DATE AS ingestion_date, * FROM
(SELECT AVG(fare_amount) AS avg_fare_amount
FROM nyc_taxi) AS a,
(SELECT SUM( tolls_amount + improvement_surcharge) AS total_tolls_and_surcharges_revenue
FROM nyc_taxi) AS b,
(SELECT AVG(total_amount) AS avg_total_amount
FROM nyc_taxi) AS c
'''
# Using the function from the sql_etl_table_1 module
df_report = et.transform_data(report, engine)
#print(df_report)
et.load_report_to_warehouse(df_report, 'financial_performance', engine)
engine.dispose()