-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathcohort_rentention_analysis.sql
185 lines (156 loc) · 3.49 KB
/
cohort_rentention_analysis.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
---Cleaning Data
---Total Records = 541909
---135080 Records have no customerID
---406829 Records have customerID
;with online_retail as
(
SELECT [InvoiceNo]
,[StockCode]
,[Description]
,[Quantity]
,[InvoiceDate]
,[UnitPrice]
,[CustomerID]
,[Country]
FROM [PortfolioDB].[dbo].[online_retail]
Where CustomerID != 0
)
, quantity_unit_price as
(
---397882 records with quantity and Unit price
select *
from online_retail
where Quantity > 0 and UnitPrice > 0
)
, dup_check as
(
---duplicate check
select * , ROW_NUMBER() over (partition by InvoiceNo, StockCode, Quantity order by InvoiceDate)dup_flag
from quantity_unit_price
)
---397667 clean data
--5215 duplicate records
select *
into #online_retail_main
from dup_check
where dup_flag = 1
----Clean Data
----BEGIN COHORT ANALYSIS
select * from #online_retail_main
--Unique Identifier (CustomerID)
--Initial Start Date (First Invoice Date)
--Revenue Data
select
CustomerID,
min(InvoiceDate) first_purchase_date,
DATEFROMPARTS(year(min(InvoiceDate)), month(min(InvoiceDate)), 1) Cohort_Date
into #cohort
from #online_retail_main
group by CustomerID
select *
from #cohort
---Create Cohort Index
select
mmm.*,
cohort_index = year_diff * 12 + month_diff + 1
into #cohort_retention
from
(
select
mm.*,
year_diff = invoice_year - cohort_year,
month_diff = invoice_month - cohort_month
from
(
select
m.*,
c.Cohort_Date,
year(m.InvoiceDate) invoice_year,
month(m.InvoiceDate) invoice_month,
year(c.Cohort_Date) cohort_year,
month(c.Cohort_Date) cohort_month
from #online_retail_main m
left join #cohort c
on m.CustomerID = c.CustomerID
)mm
)mmm
--where CustomerID = 14733
---Pivot Data to see the cohort table
select *
into #cohort_pivot
from(
select distinct
CustomerID,
Cohort_Date,
cohort_index
from #cohort_retention
)tbl
pivot(
Count(CustomerID)
for Cohort_Index In
(
[1],
[2],
[3],
[4],
[5],
[6],
[7],
[8],
[9],
[10],
[11],
[12],
[13])
)as pivot_table
select *
from #cohort_pivot
order by Cohort_Date
select Cohort_Date ,
(1.0 * [1]/[1] * 100) as [1],
1.0 * [2]/[1] * 100 as [2],
1.0 * [3]/[1] * 100 as [3],
1.0 * [4]/[1] * 100 as [4],
1.0 * [5]/[1] * 100 as [5],
1.0 * [6]/[1] * 100 as [6],
1.0 * [7]/[1] * 100 as [7],
1.0 * [8]/[1] * 100 as [8],
1.0 * [9]/[1] * 100 as [9],
1.0 * [10]/[1] * 100 as [10],
1.0 * [11]/[1] * 100 as [11],
1.0 * [12]/[1] * 100 as [12],
1.0 * [13]/[1] * 100 as [13]
from #cohort_pivot
order by Cohort_Date
---DYNAMIC SQL TO CREATE PIVOT TABLE
DECLARE
@columns NVARCHAR(MAX) = '',
@sql NVARCHAR(MAX) = '';
SELECT
@columns += QUOTENAME(cohort_index) + ','
FROM
(select distinct cohort_index from #cohort_retention) m
ORDER BY
cohort_index;
SET @columns = LEFT(@columns, LEN(@columns) - 1);
PRINT @columns;
-- construct dynamic SQL
SET @sql ='
---# Return number of unique elements in the object
SELECT *
FROM
(
select distinct
Cohort_Date,
cohort_index,
CustomerID
from #cohort_retention
) t
PIVOT(
COUNT(CustomerID)
FOR cohort_index IN ('+ @columns +')
) AS pivot_table
order by Cohort_Date
';
-- execute the dynamic SQL
EXECUTE sp_executesql @sql;