-
Notifications
You must be signed in to change notification settings - Fork 0
/
cic_analytics.jl
206 lines (159 loc) · 5.6 KB
/
cic_analytics.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
##
#load packages
using Plots
using StatsPlots
using Indicators
using MarketData
using IterableTables
using DataFrames
using StatsBase
using Statistics
using TimeSeries
using Impute: Impute
using FreqTables
using HypothesisTests
using StatsModels
using Lathe
using GLM
##
##--------
#load dataset and process
function process_assets(symbol,period) #gotta figure out how to fix date
data = yahoo(symbol, YahooOpt(period1 =(now() - period),period2 =now()-Day(4)))
returns = percentchange(data.Close)
merged_data = merge(data,returns.*100)
merged_data= TimeSeries.rename(merged_data::TimeArray, :Close_1 => :Return)
merged_df = DataFrame(merged_data)
return merged_data, merged_df
end
eth, eth_df = process_assets(string("ETH-USD"),Month(2))
lnk, lnk_df = process_assets(string("LINK-USD"),Month(2))
snp, snp_df = process_assets(string("^GSPC"),Month(2))
#eth = yahoo(:"ETH-USD", YahooOpt(period1 = now() - Month(1)))
##---
##-- candlestick and MAs function
function crypto_MAs(ts,df,n1=7,n2=2)
plot(ts, seriestype = :candlestick,title="Candlestick")
movingaverage=sma(sort(df, :timestamp).Close, n=n1)
short_MA = sma(sort(df, :timestamp).Close, n=n2)
plot!(movingaverage, linewidth=2, color=:black)
plot!(short_MA, linewidth=2, color=:blue,label=n2)
end
##--
# Work on eth EDA
print(describe(eth_df),"\n")
sigma = std(eth_df.Close)
print("sigma: ",sigma)
#candlestick with MA
crypto_MAs(eth,eth_df)
#Plot density
histogram(eth_df[:,"Return"],bins=25,label="Returns")
#its interesteing that there's a gap between 1400-1500 price
##----
##--
#Plotting link
crypto_MAs(lnk,lnk_df)
p1=histogram(lnk.Close,bins=25,label="Close")
p2=histogram(lnk.Return,bins=25,label="Return")
plot(p1, p2, layout = (1, 2))
print(describe(lnk_df),"\n","sigma ",std(lnk_df.Close))
##--
##--
crypto_MAs(snp,snp_df)
p1=histogram(snp.Close,bins=15,label="Close")
p2=histogram(snp.Return,bins=15,label="Return")
plot(p1, p2, layout = (1, 2),title="SNP Close & Return")
print(describe(snp_df),"\n","sigma ",std(snp_df.Close))
##--
##--
eth_snp_df= leftjoin(eth_df,snp_df, on = :timestamp,makeunique=true)
closing_df = eth_snp_df[:,["Close","Close_1"]]
eth_snp_df = Impute.nocb(eth_snp_df)
closing_df = Impute.nocb(closing_df)
eth_lnk_df = leftjoin(eth_df,lnk_df, on = :timestamp,makeunique=true)
eth_lnk_df_close = eth_lnk_df[:,["Close","Close_1"]]
##--
##--
# ETH vs SNP
gr()
plot(closing_df.Close,closing_df.Close_1, seriestype = :scatter, title = "S&P Close and ETH Close")
#no correlation between returns for snp and eth
@df eth_snp_df cor(:Return,:Return_1)
#moderate correlation in closing prices
@df eth_snp_df cor(:Close,:Close_1)
#ETH vs LNK
plot(eth_lnk_df_close.Close,eth_lnk_df_close.Close_1,
seriestype = :scatter, title = "ETH Close and LNK Close",label="Close")
#no correlation between returns for snp and eth
@df eth_lnk_df cor(:Return,:Return_1)
#good correlation in closing prices
@df eth_lnk_df cor(:Close,:Close_1)
##--
#split data first
combined_data=DataFrame(eth=eth_df.Close,lnk=lnk_df.Close)
train, test = Lathe.preprocess.TrainTestSplit(combined_data,.70)
#### Next step is to normalize returns and check scatter plot/corr
#function to normalize a pair of columns ie eth and snp returns
function normalize_column(arr)
dt=StatsBase.fit(UnitRangeTransform, arr; dims=1, unit=true)
dt_norm = StatsBase.transform(dt,arr)
return dt_norm
end
eth_norm_close = normalize_column(train.eth)
lnk_norm_close = normalize_column(train.lnk)
eth_lnk_norm_close = DataFrame(eth=eth_norm_close,lnk=lnk_norm_close)
#plot(eth_norm_returns,snp_norm_returns, seriestype = :scatter, title = "S&P Return and ETH Return")
#no correlation at all with returns
##--
#Run linear regression on lnk and eth
fm = @formula(eth ~ lnk)
linearRegressor = lm(fm, eth_lnk_norm_close)
print(linearRegressor)
gr()
plot(eth_lnk_norm_close.lnk,eth_lnk_norm_close.eth,
seriestype = :scatter, title = "ETH Close and LNK Close",label="Close",legend=:bottomright)
plot!((x) -> coef(linearRegressor)[1] + coef(linearRegressor)[2] * x, 0, 1, label="fit_exact")
##--
eth_norm_close_test = normalize_column(test.eth)
lnk_norm_close_test = normalize_column(test.lnk)
test = DataFrame(eth=eth_norm_close_test,lnk=lnk_norm_close_test)
ypredicted_test = predict(linearRegressor, test)
#predicted vs actual
pred_df = DataFrame(eth_pred=ypredicted_test,eth_actual=test.eth)
pred_df.err = pred_df.eth_actual-pred_df.eth_pred
rmse = sqrt(mean(pred_df.err.*pred_df.err))
print("RMSE:", rmse)
##--
#Bayesian Inference section
#Build function to calculate BI probability
#function-----------------------
function bayes_prob(df1,df2,increase=true,complement=false)
df = leftjoin(df1,df2, on = :timestamp,makeunique=true)
if sum(describe(df).nmissing) > 0
df = Impute.nocb(df) #in case missing values exist
end
return_df = DataFrame(A=df.Return.>0,B=df.Return_1.>0)
my_table = freqtable(return_df.B,return_df.A)
if increase == true
prob_a = sum(my_table[:,Name(true)])/sum(my_table)
prob_b = sum(my_table[Name(true),:])/sum(my_table)
prob_ba = sum(my_table[Name(true),Name(true)])/sum(my_table[:,Name(true)])
prob_ab = prob_ba*prob_a/prob_b
else
prob_a = sum(my_table[:,Name(true)])/sum(my_table)
prob_b = sum(my_table[Name(false),:])/sum(my_table)
prob_ba = sum(my_table[Name(false),Name(true)])/sum(my_table[:,Name(true)])
prob_ab = prob_ba*prob_a/prob_b
end
if complement==false
return prob_ab
else
return 1-prob_ab
end
end
bayes_prob(eth_df,lnk_df)
#end function----------------------------
##--
#put in weave
#summarize findings and contents
##--