Skip to content

Commit ee5beb2

Browse files
author
davidlenz
committed
first commit
1 parent 73cfa48 commit ee5beb2

16 files changed

+62086
-0
lines changed

00_Introduction.ipynb

+616
Large diffs are not rendered by default.

01_data_prep.ipynb

+1,092
Large diffs are not rendered by default.

02_Topic_Modelling.ipynb

+1,090
Large diffs are not rendered by default.

03_Indicators_from_topics.ipynb

+855
Large diffs are not rendered by default.

data/BTC-USD.tsv

+24,013
Large diffs are not rendered by default.

data/ETH-USD.tsv

+24,013
Large diffs are not rendered by default.

data/sample_5000.csv

+5,006
Large diffs are not rendered by default.

data/sample_5000_clean.csv

+5,006
Large diffs are not rendered by default.

img/img2.png

40.1 KB
Loading

models/lda

1.69 MB
Binary file not shown.

models/tf

7.06 MB
Binary file not shown.

models/tfvec

374 KB
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
mean_fit_time std_fit_time mean_score_time std_score_time param_doc_topic_prior param_learning_decay param_n_components param_topic_word_prior params split0_test_score split1_test_score split2_test_score split3_test_score split4_test_score mean_test_score std_test_score rank_test_score
2+
0 2.0170026302337645 0.03741550737203571 0.07639555931091309 0.008662589538308011 0.839361047440404 0.8212949650754298 6 0.564190131926705 {'doc_topic_prior': 0.839361047440404, 'learning_decay': 0.8212949650754298, 'n_components': 6, 'topic_word_prior': 0.564190131926705} -256275.04843016082 -332980.2955978812 -339697.7831902633 -252611.54220866307 -294396.5861029371 -295192.25110598106 36706.88548415127 1
3+
1 1.9184665203094482 0.026444200509169142 0.06741962432861329 0.0016203758946394715 0.9012077155645184 0.20318733949715184 12 0.27650064004349884 {'doc_topic_prior': 0.9012077155645184, 'learning_decay': 0.20318733949715184, 'n_components': 12, 'topic_word_prior': 0.27650064004349884} -274376.74411388 -350654.89200577175 -358775.20342676085 -271433.3904172668 -313273.76952689944 -313702.79989811574 36688.974854704364 2

results/random_search_results.csv

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
mean_fit_time std_fit_time mean_score_time std_score_time param_doc_topic_prior param_learning_decay param_n_components param_topic_word_prior params split0_test_score split1_test_score split2_test_score mean_test_score std_test_score rank_test_score
2+
3 1.7948634624481201 0.06372689183883608 0.15791106224060059 0.008782967158867946 0.686283549981997 0.7148637722981328 10 0.7397423008958642 {'doc_topic_prior': 0.686283549981997, 'learning_decay': 0.7148637722981328, 'n_components': 10, 'topic_word_prior': 0.7397423008958642} -415683.669236465 -558087.7684047661 -460645.28524348646 -478138.9076282392 59437.65351519789 1
3+
2 1.75497039159139 0.09073817931020373 0.1765278180440267 0.010772197845102622 0.31636050352204015 0.19359241206374078 12 0.8348964292496762 {'doc_topic_prior': 0.31636050352204015, 'learning_decay': 0.19359241206374078, 'n_components': 12, 'topic_word_prior': 0.8348964292496762} -417204.68684397696 -559665.8833177035 -463124.043780226 -479998.20464730216 59370.87447510158 2
4+
4 1.7589598496754963 0.06371169214306488 0.1589082876841227 0.010181513243585865 0.49340914215266685 0.4730117315460203 10 0.496006089068606 {'doc_topic_prior': 0.49340914215266685, 'learning_decay': 0.4730117315460203, 'n_components': 10, 'topic_word_prior': 0.496006089068606} -418163.50349439256 -562046.318347004 -463743.5747094519 -481317.79885028285 60040.0206423007 3
5+
0 1.6991198857625325 0.022979306980971597 0.13763165473937988 0.010679466852180335 0.839361047440404 0.8212949650754298 13 0.564190131926705 {'doc_topic_prior': 0.839361047440404, 'learning_decay': 0.8212949650754298, 'n_components': 13, 'topic_word_prior': 0.564190131926705} -422673.9353439271 -565248.0404827606 -468133.50046436087 -485351.8254303495 59465.37872303278 4
6+
1 1.5521802107493083 0.0526306807992322 0.10704660415649414 0.009437508407127758 0.9012077155645184 0.20318733949715184 19 0.27650064004349884 {'doc_topic_prior': 0.9012077155645184, 'learning_decay': 0.20318733949715184, 'n_components': 19, 'topic_word_prior': 0.27650064004349884} -436101.81640047766 -580612.080951003 -483637.0966353889 -500116.9979956232 60135.92659826567 5

src/crycompare.py

+182
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
import sys
2+
import requests
3+
import warnings
4+
5+
6+
class Price:
7+
def __init__(self):
8+
self.__coinlisturl = 'https://www.cryptocompare.com/api/data/coinlist/'
9+
self.__priceurl = 'https://min-api.cryptocompare.com/data/price?'
10+
self.__pricemultiurl = 'https://min-api.cryptocompare.com/data/pricemulti?'
11+
self.__pricemultifullurl = 'https://min-api.cryptocompare.com/data/pricemultifull?'
12+
self.__generateavgurl = 'https://min-api.cryptocompare.com/data/generateAvg?'
13+
self.__dayavgurl = 'https://min-api.cryptocompare.com/data/dayAvg?'
14+
self.__historicalurl = 'https://min-api.cryptocompare.com/data/pricehistorical?'
15+
self.__coinsnapshoturl = 'https://www.cryptocompare.com/api/data/coinsnapshot/?'
16+
self.__coinsnapshotfull = 'https://www.cryptocompare.com/api/data/coinsnapshotfullbyid/?'
17+
18+
def coinList(self):
19+
return self.__get_url(self.__coinlisturl)
20+
21+
def price(self, from_curr, to_curr, e=None, extraParams=None, sign=False, tryConversion=True):
22+
return self.__get_price(self.__priceurl, from_curr, to_curr, e, extraParams, sign, tryConversion)
23+
24+
def priceMulti(self, from_curr, to_curr, e=None, extraParams=None, sign=False, tryConversion=True):
25+
return self.__get_price(self.__pricemultiurl, from_curr, to_curr, e, extraParams, sign, tryConversion)
26+
27+
def priceMultiFull(self, from_curr, to_curr, e=None, extraParams=None, sign=False, tryConversion=True):
28+
return self.__get_price(self.__pricemultifullurl, from_curr, to_curr, e, extraParams, sign, tryConversion)
29+
30+
def priceHistorical(self, from_curr, to_curr, markets, ts=None, e=None, extraParams=None,
31+
sign=False, tryConversion=True):
32+
return self.__get_price(self.__historicalurl, from_curr, to_curr, markets, e, extraParams, sign, tryConversion)
33+
34+
def generateAvg(self, from_curr, to_curr, markets, extraParams=None, sign=False, tryConversion=True):
35+
return self.__get_avg(self.__generateavgurl, from_curr, to_curr, markets, extraParams, sign, tryConversion)
36+
37+
def dayAvg(self, from_curr, to_curr, e=None, extraParams=None, sign=False, tryConversion=True,
38+
avgType=None, UTCHourDiff=0, toTs=None):
39+
return self.__get_avg(self.__dayavgurl, from_curr, to_curr, e, extraParams, sign,
40+
tryConversion, avgType, UTCHourDiff, toTs)
41+
42+
def coinSnapshot(self, from_curr, to_curr):
43+
return self.__get_url(self.__coinsnapshoturl + 'fsym=' + from_curr.upper() + '&tsym=' + to_curr.upper())
44+
45+
def coinSnapshotFullById(self, coin_id):
46+
return self.__get_url(self.__coinsnapshotfull + 'id=' + str(coin_id))
47+
48+
def __get_price(self, baseurl, from_curr, to_curr, e=None, extraParams=None, sign=False,
49+
tryConversion=True, markets=None, ts=None):
50+
args = list()
51+
if isinstance(from_curr, str):
52+
args.append('fsym=' + from_curr.upper())
53+
elif isinstance(from_curr, list):
54+
args.append('fsyms=' + ','.join(from_curr).upper())
55+
if isinstance(to_curr, list):
56+
args.append('tsyms=' + ','.join(to_curr).upper())
57+
elif isinstance(to_curr, str):
58+
args.append('tsyms=' + to_curr.upper())
59+
if isinstance(markets, str):
60+
args.append('markets=' + markets)
61+
elif isinstance(markets, list):
62+
args.append('markets=' + ','.join(markets))
63+
if e:
64+
args.append('e=' + e)
65+
if extraParams:
66+
args.append('extraParams=' + extraParams)
67+
if sign:
68+
args.append('sign=true')
69+
if ts:
70+
args.append('ts=' + str(ts))
71+
if not tryConversion:
72+
args.append('tryConversion=false')
73+
if len(args) >= 2:
74+
return self.__get_url(baseurl + '&'.join(args))
75+
else:
76+
raise ValueError('Must have both fsym and tsym arguments.')
77+
78+
def __get_avg(self, baseurl, from_curr, to_curr, markets=None, e=None, extraParams=None,
79+
sign=False, tryConversion=True, avgType=None, UTCHourDiff=0, toTs=None):
80+
args = list()
81+
if isinstance(from_curr, str):
82+
args.append('fsym=' + from_curr.upper())
83+
if isinstance(to_curr, str):
84+
args.append('tsym=' + to_curr.upper())
85+
if isinstance(markets, str):
86+
args.append('markets=' + markets)
87+
elif isinstance(markets, list):
88+
args.append('markets=' + ','.join(markets))
89+
if e:
90+
args.append('e=' + e)
91+
if extraParams:
92+
args.append('extraParams=' + extraParams)
93+
if sign:
94+
args.append('sign=true')
95+
if avgType:
96+
args.append('avgType=' + avgType)
97+
if UTCHourDiff:
98+
args.append('UTCHourDiff=' + str(UTCHourDiff))
99+
if toTs:
100+
args.append('toTs=' + toTs)
101+
if not tryConversion:
102+
args.append('tryConversion=false')
103+
if len(args) >= 2:
104+
return self.__get_url(baseurl + '&'.join(args))
105+
else:
106+
raise ValueError('Must have both fsym and tsym arguments.')
107+
108+
def __get_url(self, url):
109+
raw_data = requests.get(url)
110+
raw_data.encoding = 'utf-8'
111+
if raw_data.status_code != 200:
112+
raw_data.raise_for_status()
113+
return False
114+
try:
115+
if isinstance(raw_data.text, unicode):
116+
warnings.warn('Object returned is of type unicode. Cannot parse to str in Python 2.')
117+
except NameError:
118+
pass
119+
return raw_data.json()
120+
121+
122+
class History:
123+
def __init__(self):
124+
self.__histominuteurl = 'https://min-api.cryptocompare.com/data/histominute?'
125+
self.__histohoururl = 'https://min-api.cryptocompare.com/data/histohour?'
126+
self.__histodayurl = 'https://min-api.cryptocompare.com/data/histoday?'
127+
128+
def histoMinute(self, from_curr, to_curr, e=None, extraParams=None,
129+
sign=False, tryConversion=True, aggregate=None, limit=None, toTs=None):
130+
return self.__get_price(self.__histominuteurl, from_curr, to_curr, e, extraParams, sign,
131+
tryConversion, aggregate, limit, toTs)
132+
133+
def histoHour(self, from_curr, to_curr, e=None, extraParams=None,
134+
sign=False, tryConversion=True, aggregate=None, limit=None, toTs=None):
135+
return self.__get_price(self.__histohoururl, from_curr, to_curr, e, extraParams, sign,
136+
tryConversion, aggregate, limit, toTs)
137+
138+
def histoDay(self, from_curr, to_curr, e=None, extraParams=None, sign=False,
139+
tryConversion=True, aggregate=None, limit=None, toTs=None, allData=False):
140+
return self.__get_price(self.__histodayurl, from_curr, to_curr, e, extraParams, sign,
141+
tryConversion, aggregate, limit, toTs, allData)
142+
143+
def __get_price(self, baseurl, from_curr, to_curr, e=None, extraParams=None, sign=False,
144+
tryConversion=True, aggregate=None, limit=None, toTs=None, allData=False):
145+
args = list()
146+
if isinstance(from_curr, str):
147+
args.append('fsym=' + from_curr.upper())
148+
if isinstance(to_curr, str):
149+
args.append('tsym=' + to_curr.upper())
150+
if e:
151+
args.append('e=' + e)
152+
if extraParams:
153+
args.append('extraParams=' + extraParams)
154+
if sign:
155+
args.append('sign=true')
156+
if aggregate:
157+
args.append('aggregate=' + str(aggregate))
158+
if limit:
159+
args.append('limit=' + str(limit))
160+
if toTs:
161+
args.append('toTs=' + str(toTs))
162+
if allData:
163+
args.append('allData=true')
164+
if not tryConversion:
165+
args.append('tryConversion=false')
166+
if len(args) >= 2:
167+
return self.__get_url(baseurl + '&'.join(args))
168+
else:
169+
raise ValueError('Must have both fsym and tsym arguments.')
170+
171+
def __get_url(self, url):
172+
raw_data = requests.get(url)
173+
raw_data.encoding = 'utf-8'
174+
if raw_data.status_code != 200:
175+
raw_data.raise_for_status()
176+
return False
177+
try:
178+
if isinstance(raw_data.text, unicode):
179+
warnings.warn('Object returned is of type unicode. Cannot parse to str in Python 2.')
180+
except NameError:
181+
pass
182+
return raw_data.json()

src/get_data.ipynb

+204
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 29,
6+
"metadata": {
7+
"collapsed": true,
8+
"jupyter": {
9+
"outputs_hidden": true
10+
}
11+
},
12+
"outputs": [
13+
{
14+
"name": "stdout",
15+
"output_type": "stream",
16+
"text": [
17+
"ETH\n",
18+
"2019-12-15 08:00:00\n",
19+
"2020-03-07 16:00:00\n",
20+
"\n",
21+
"2019-09-23 00:00:00\n",
22+
"2019-12-15 08:00:00\n",
23+
"\n",
24+
"2019-07-01 16:00:00\n",
25+
"2019-09-23 00:00:00\n",
26+
"\n",
27+
"2019-04-09 08:00:00\n",
28+
"2019-07-01 16:00:00\n",
29+
"\n",
30+
"2019-01-16 00:00:00\n",
31+
"2019-04-09 08:00:00\n",
32+
"\n",
33+
"2018-10-24 16:00:00\n",
34+
"2019-01-16 00:00:00\n",
35+
"\n",
36+
"2018-08-02 08:00:00\n",
37+
"2018-10-24 16:00:00\n",
38+
"\n",
39+
"2018-05-11 00:00:00\n",
40+
"2018-08-02 08:00:00\n",
41+
"\n",
42+
"2018-02-16 16:00:00\n",
43+
"2018-05-11 00:00:00\n",
44+
"\n",
45+
"2017-11-25 08:00:00\n",
46+
"2018-02-16 16:00:00\n",
47+
"\n",
48+
"2017-09-03 00:00:00\n",
49+
"2017-11-25 08:00:00\n",
50+
"\n",
51+
"BTC\n",
52+
"2019-12-15 08:00:00\n",
53+
"2020-03-07 16:00:00\n",
54+
"\n",
55+
"2019-09-23 00:00:00\n",
56+
"2019-12-15 08:00:00\n",
57+
"\n",
58+
"2019-07-01 16:00:00\n",
59+
"2019-09-23 00:00:00\n",
60+
"\n",
61+
"2019-04-09 08:00:00\n",
62+
"2019-07-01 16:00:00\n",
63+
"\n",
64+
"2019-01-16 00:00:00\n",
65+
"2019-04-09 08:00:00\n",
66+
"\n",
67+
"2018-10-24 16:00:00\n",
68+
"2019-01-16 00:00:00\n",
69+
"\n",
70+
"2018-08-02 08:00:00\n",
71+
"2018-10-24 16:00:00\n",
72+
"\n",
73+
"2018-05-11 00:00:00\n",
74+
"2018-08-02 08:00:00\n",
75+
"\n",
76+
"2018-02-16 16:00:00\n",
77+
"2018-05-11 00:00:00\n",
78+
"\n",
79+
"2017-11-25 08:00:00\n",
80+
"2018-02-16 16:00:00\n",
81+
"\n",
82+
"2017-09-03 00:00:00\n",
83+
"2017-11-25 08:00:00\n",
84+
"\n",
85+
"Wall time: 21.2 s\n"
86+
]
87+
}
88+
],
89+
"source": [
90+
"import time\n",
91+
"import crycompare\n",
92+
"import pandas as pd\n",
93+
"import os\n",
94+
"def download_hist_price_data(base_cur, dest_cur, granularity='hour', GET_DATA_UNTIL_DATE='today', verbose=False):\n",
95+
" \"\"\"\n",
96+
" Load data from cryptocompare for base_cur to dest_cur with the given granularity (either 'hour' or 'minute').\n",
97+
" For every timesteps 2000 datapoints are fetched.\n",
98+
" \"\"\"\n",
99+
"\n",
100+
" hist = crycompare.History()\n",
101+
"\n",
102+
" dfs = []\n",
103+
"\n",
104+
" # fetch data\n",
105+
" if granularity == 'hour':\n",
106+
" df = pd.DataFrame(hist.histoHour(base_cur, dest_cur, limit=2000)['Data'])\n",
107+
" elif granularity == 'minute':\n",
108+
" df = pd.DataFrame(hist.histoMinute(base_cur, dest_cur, limit=2000)['Data'])\n",
109+
" else:\n",
110+
" raise Exception('Granularity should be either \"hour\" or \"minute\", not \"{}\"'.format(granularity))\n",
111+
"\n",
112+
" # transform timestamps\n",
113+
"\n",
114+
" df['date'] = df.time.apply(pd.to_datetime, unit='s')\n",
115+
" last_time = df.iloc[0].time\n",
116+
" dfs.append(df)\n",
117+
"\n",
118+
" while df.date.min() > pd.to_datetime(GET_DATA_UNTIL_DATE):\n",
119+
" if verbose:\n",
120+
" print(df.date.min())\n",
121+
" print(df.date.max())\n",
122+
" print()\n",
123+
"\n",
124+
" if granularity == 'hour':\n",
125+
" df = pd.DataFrame(hist.histoHour(base_cur, dest_cur, limit=2000, toTs=last_time)['Data'])\n",
126+
" elif granularity == 'minute':\n",
127+
" df = pd.DataFrame(hist.histoMinute(base_cur, dest_cur, limit=2000, toTs=last_time)['Data'])\n",
128+
"\n",
129+
" df['date'] = df.time.apply(pd.to_datetime, unit='s')\n",
130+
" dfs.append(df)\n",
131+
" last_time = df.iloc[0].time\n",
132+
"\n",
133+
" out = pd.concat(dfs).reset_index(drop=True)\n",
134+
" out.index = out.date\n",
135+
" out = out.sort_index()\n",
136+
" return out\n",
137+
"\n",
138+
"def load_and_save_coin_prices(coinsymbols, PRICE_DATA_PATH, GET_DATA_UNTIL_DATE):\n",
139+
" \"\"\"\n",
140+
"\n",
141+
" :param coinsymbols: iterable of coin symbols, e.g. ['BAT','BTC']\n",
142+
" :param PRICE_DATA_PATH: Save dir\n",
143+
" :param GET_DATA_UNTIL_DATE: Starting from now, back to when shall the data be retrieved?\n",
144+
" :return: Nothing. Saves data into Price_DATA_PATH\n",
145+
" \"\"\"\n",
146+
" for coin in coinsymbols:\n",
147+
" try:\n",
148+
" print(coin)\n",
149+
" base_cur = coin\n",
150+
" dest_cur = 'USD'\n",
151+
"\n",
152+
" # download data and rename columns\n",
153+
" df = download_hist_price_data(base_cur, dest_cur, granularity='hour',\n",
154+
" GET_DATA_UNTIL_DATE=GET_DATA_UNTIL_DATE,\n",
155+
" verbose=True)\n",
156+
" df = df[['time', 'low', 'high', 'open', 'close', 'volumefrom']]\n",
157+
" df.columns = ['time', 'low', 'high', 'open', 'close', 'volume']\n",
158+
"\n",
159+
" # save data\n",
160+
" save_path_coin = os.path.abspath(os.path.join(PRICE_DATA_PATH, f'{base_cur}-{dest_cur}'))\n",
161+
" df.to_csv(f'{save_path_coin}.tsv', header=True, index=False, sep='\\t')\n",
162+
"\n",
163+
" # be gentle to the api\n",
164+
" time.sleep(2)\n",
165+
" except Exception as e:\n",
166+
" print(coin, 'did not work bc', e)\n",
167+
" \n",
168+
" \n",
169+
"coinsymbols = [\"ETH\", \"BTC\"]\n",
170+
"PRICE_DATA_PATH = '../data/'\n",
171+
"GET_DATA_UNTIL_DATE = \"2017-08-01\"\n",
172+
"%time load_and_save_coin_prices(coinsymbols, PRICE_DATA_PATH, GET_DATA_UNTIL_DATE)"
173+
]
174+
},
175+
{
176+
"cell_type": "code",
177+
"execution_count": 38,
178+
"metadata": {},
179+
"outputs": [],
180+
"source": []
181+
}
182+
],
183+
"metadata": {
184+
"kernelspec": {
185+
"display_name": "Python 3",
186+
"language": "python",
187+
"name": "python3"
188+
},
189+
"language_info": {
190+
"codemirror_mode": {
191+
"name": "ipython",
192+
"version": 3
193+
},
194+
"file_extension": ".py",
195+
"mimetype": "text/x-python",
196+
"name": "python",
197+
"nbconvert_exporter": "python",
198+
"pygments_lexer": "ipython3",
199+
"version": "3.7.6"
200+
}
201+
},
202+
"nbformat": 4,
203+
"nbformat_minor": 4
204+
}

0 commit comments

Comments
 (0)