From cd385df78972140a4aa78fc485935e2d5aeb958b Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Fri, 5 Mar 2021 18:06:40 +0100 Subject: [PATCH 01/53] added working price preloading using ccxt --- requirements.txt | 1 + src/price_data.py | 109 ++++++++++++++++++++++++++++++++++++++++++++++ src/taxman.py | 3 ++ 3 files changed, 113 insertions(+) diff --git a/requirements.txt b/requirements.txt index b70bcb89..542c7cb2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +ccxt==1.42.47 certifi==2020.12.5 chardet==4.0.0 idna==2.10 diff --git a/src/price_data.py b/src/price_data.py index 51eb438c..c471b070 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -23,8 +23,10 @@ import time from pathlib import Path from typing import Any, Optional, Union +from time import sleep import requests +import ccxt import config import misc @@ -414,3 +416,110 @@ def get_cost( if isinstance(tr, transaction.SoldCoin): return price * tr.sold raise NotImplementedError + + def get_candles(self, start, stop, symbol): + if self.exchange.has['fetchOHLCV']: + sleep(self.exchange.rateLimit / 1000) # time.sleep wants seconds + print(f"get {max(int((stop-start)/1000/60)+2,1)} rows") + # get 2min before and after range + return self.exchange.fetch_ohlcv(symbol, '1m', start-1000*60*2, max(int((stop-start)/1000/60)+5, 1)) + else: + raise Exception + + def initialize_ccxt(self): + exchange_id = 'binance' + exchange_class = getattr(ccxt, exchange_id) + self.exchange = exchange_class() + self.markets = [] + markets = self.exchange.fetch_markets() + for market in markets: + # may not apply for all exchanges, currently works for binance + self.markets.append(market["symbol"].split("/")) + + def _get_binance_bulk_pair_list(self, reference_coin, coin): + def get_pair(coin, reference_coin): + for market in self.markets: + if market[0] == coin and market[1] == reference_coin: + return [coin, reference_coin, False] + elif market[1] == coin and market[0] == reference_coin: + return [reference_coin, coin, True] + + pair = get_pair(coin, reference_coin) + if not pair: + for market in self.markets: + if market[0] == coin: + pair = get_pair(market[1], reference_coin) + if pair: + return [[market[0], market[1], False], pair] + elif market[1] == coin: + pair = get_pair(market[1], reference_coin) + if pair: + return [[market[1], market[0], True], pair] + return [pair, ] + + def _get_binance_bulk_pair_data(self, operations, symbol, invert=False): + timestamps = [] + timestamppairs = [] + counter = 0 + data = [] + for op in operations: + timestamps.append(op.utc_time) + while len(timestamps) > 0: + timestamp = timestamps.pop(0) + if len(timestamppairs) > 0 and timestamppairs[-1][0]+datetime.timedelta(minutes=995) > timestamp: + timestamppairs[-1].append(timestamp) + else: + timestamppairs.append([timestamp, ]) + for batch in timestamppairs: + last = int(max(batch).timestamp() * 1000) + first = int(min(batch).timestamp() * 1000) + if invert: + tempdata = list( + map(lambda x: (x[0], 1/((x[1]+x[4])/2)), self.get_candles(first, last, symbol))) + else: + tempdata = list( + map(lambda x: (x[0], (x[1]+x[4])/2), self.get_candles(first, last, symbol))) + if tempdata: + for stamp in batch: + # TODO discuss which candle is picked current is closest to original date (often off by about 1-20s, but can be after the Trade) + # times do not always line up perfectly so take one nearest + ts = list( + map(lambda x: (abs(stamp.timestamp()*1000-x[0]), x), tempdata)) + print(min(ts, key=lambda x: x[0])[0]/1000) + data.append((stamp, min(ts, key=lambda x: x[0])[1][1])) + return data + + def preload_price_data(self, operations, coin): + + reference_coin = config.FIAT + lis = self._get_binance_bulk_pair_list(reference_coin, coin) + db_path = self.get_db_path("binance") + operations_filtered = [] + tablename = self.get_tablename(coin, reference_coin) + + if lis: + for operation in operations: + if not self.__get_price_db(db_path, tablename, operation.utc_time): + operations_filtered.append(operation) + if len(lis) == 1 and lis[0]: + data = self._get_binance_bulk_pair_data( + operations_filtered, f"{lis[0][0]}/{lis[0][1]}", lis[0][2]) + for element in data: + self.__set_price_db(db_path, tablename, + element[0], element[1]) + elif len(lis) == 2 and lis[0] and lis[1]: + data = self._get_binance_bulk_pair_data( + operations_filtered, f"{lis[0][0]}/{lis[0][1]}", lis[0][2]) + data2 = self._get_binance_bulk_pair_data( + operations_filtered, f"{lis[1][0]}/{lis[1][1]}", lis[1][2]) + for element in data: + factor = None + for element2 in data2: + if element[0] == element2[0]: + factor = element2[1] + break + if factor: + price = element[1]*factor + if not self.__get_price_db(db_path, tablename, element[0]): + self.__set_price_db( + db_path, tablename, element[0], price) diff --git a/src/taxman.py b/src/taxman.py index 056ac3f0..58fd5aea 100644 --- a/src/taxman.py +++ b/src/taxman.py @@ -45,6 +45,8 @@ def __init__(self, book: Book, price_data: PriceData) -> None: self.__evaluate_taxation = getattr(self, f"_evaluate_taxation_{country}") except AttributeError: raise NotImplementedError(f"Unable to evaluate taxation for {country=}.") + + self.price_data.initialize_ccxt() if config.PRINCIPLE == core.Principle.FIFO: self.BalanceType = balance_queue.BalanceQueue @@ -178,6 +180,7 @@ def evaluate_taxation(self) -> None: log.debug("Starting evaluation...") for coin, operations in misc.group_by(self.book.operations, "coin").items(): operations = sorted(operations, key=lambda op: op.utc_time) + self.price_data.preload_price_data(operations,coin) self.__evaluate_taxation(coin, operations) def print_evaluation(self) -> None: From 34f0c66245861b01b5a4b226ebbb8d17466f8c64 Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Fri, 5 Mar 2021 18:32:58 +0100 Subject: [PATCH 02/53] reformatting and documentation --- src/price_data.py | 57 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/src/price_data.py b/src/price_data.py index c471b070..6ef65338 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -420,11 +420,12 @@ def get_cost( def get_candles(self, start, stop, symbol): if self.exchange.has['fetchOHLCV']: sleep(self.exchange.rateLimit / 1000) # time.sleep wants seconds - print(f"get {max(int((stop-start)/1000/60)+2,1)} rows") # get 2min before and after range return self.exchange.fetch_ohlcv(symbol, '1m', start-1000*60*2, max(int((stop-start)/1000/60)+5, 1)) else: - raise Exception + logging.warning( + "fetchOHLCV not implemented on exchange, skipping priceloading using ohlcv") + raise None def initialize_ccxt(self): exchange_id = 'binance' @@ -432,94 +433,116 @@ def initialize_ccxt(self): self.exchange = exchange_class() self.markets = [] markets = self.exchange.fetch_markets() + for market in markets: # may not apply for all exchanges, currently works for binance + # caches a list of all pairs on the exchange self.markets.append(market["symbol"].split("/")) def _get_binance_bulk_pair_list(self, reference_coin, coin): + def get_pair(coin, reference_coin): + for market in self.markets: if market[0] == coin and market[1] == reference_coin: - return [coin, reference_coin, False] + return [coin, reference_coin, False] # False=not inverted elif market[1] == coin and market[0] == reference_coin: - return [reference_coin, coin, True] + return [reference_coin, coin, True] # True=inverted pair = get_pair(coin, reference_coin) if not pair: + for market in self.markets: - if market[0] == coin: - pair = get_pair(market[1], reference_coin) - if pair: + pair = get_pair(market[1], reference_coin) + + if pair: + if market[0] == coin: return [[market[0], market[1], False], pair] - elif market[1] == coin: - pair = get_pair(market[1], reference_coin) - if pair: + elif market[1] == coin: return [[market[1], market[0], True], pair] - return [pair, ] + else: + return [pair, ] def _get_binance_bulk_pair_data(self, operations, symbol, invert=False): timestamps = [] timestamppairs = [] - counter = 0 data = [] + for op in operations: timestamps.append(op.utc_time) + while len(timestamps) > 0: timestamp = timestamps.pop(0) + if len(timestamppairs) > 0 and timestamppairs[-1][0]+datetime.timedelta(minutes=995) > timestamp: timestamppairs[-1].append(timestamp) else: timestamppairs.append([timestamp, ]) + for batch in timestamppairs: + # ccxt works with timestamps in milliseconds last = int(max(batch).timestamp() * 1000) first = int(min(batch).timestamp() * 1000) + if invert: tempdata = list( map(lambda x: (x[0], 1/((x[1]+x[4])/2)), self.get_candles(first, last, symbol))) else: tempdata = list( map(lambda x: (x[0], (x[1]+x[4])/2), self.get_candles(first, last, symbol))) + if tempdata: - for stamp in batch: + for operation in batch: # TODO discuss which candle is picked current is closest to original date (often off by about 1-20s, but can be after the Trade) # times do not always line up perfectly so take one nearest ts = list( - map(lambda x: (abs(stamp.timestamp()*1000-x[0]), x), tempdata)) - print(min(ts, key=lambda x: x[0])[0]/1000) - data.append((stamp, min(ts, key=lambda x: x[0])[1][1])) + map(lambda x: (abs(operation.timestamp()*1000-x[0]), x), tempdata)) + data.append((operation, min(ts, key=lambda x: x[0])[1][1])) return data def preload_price_data(self, operations, coin): - + reference_coin = config.FIAT + # get pairs used for calculating the price lis = self._get_binance_bulk_pair_list(reference_coin, coin) db_path = self.get_db_path("binance") operations_filtered = [] tablename = self.get_tablename(coin, reference_coin) if lis: + for operation in operations: if not self.__get_price_db(db_path, tablename, operation.utc_time): operations_filtered.append(operation) + + # len 1== direct pairing with base currency if len(lis) == 1 and lis[0]: data = self._get_binance_bulk_pair_data( operations_filtered, f"{lis[0][0]}/{lis[0][1]}", lis[0][2]) for element in data: self.__set_price_db(db_path, tablename, element[0], element[1]) + + # len 2 == calculates price using two pairs e.g IOTA/ETH + ETH/EUR elif len(lis) == 2 and lis[0] and lis[1]: + # get data for first pair data = self._get_binance_bulk_pair_data( operations_filtered, f"{lis[0][0]}/{lis[0][1]}", lis[0][2]) + # get data for second pair data2 = self._get_binance_bulk_pair_data( operations_filtered, f"{lis[1][0]}/{lis[1][1]}", lis[1][2]) + for element in data: factor = None + for element2 in data2: if element[0] == element2[0]: factor = element2[1] break + if factor: price = element[1]*factor + # check if timestamp already exists to prevent a duplicate error if not self.__get_price_db(db_path, tablename, element[0]): self.__set_price_db( db_path, tablename, element[0], price) From 777c44172785eeb0163d0cf0fde3e1e103f7fe33 Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Fri, 5 Mar 2021 18:39:35 +0100 Subject: [PATCH 03/53] added type hints --- src/price_data.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/price_data.py b/src/price_data.py index 6ef65338..a435d255 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -417,7 +417,7 @@ def get_cost( return price * tr.sold raise NotImplementedError - def get_candles(self, start, stop, symbol): + def get_candles(self, start: int, stop: int, symbol: str) ->list: if self.exchange.has['fetchOHLCV']: sleep(self.exchange.rateLimit / 1000) # time.sleep wants seconds # get 2min before and after range @@ -433,15 +433,15 @@ def initialize_ccxt(self): self.exchange = exchange_class() self.markets = [] markets = self.exchange.fetch_markets() - + for market in markets: # may not apply for all exchanges, currently works for binance # caches a list of all pairs on the exchange self.markets.append(market["symbol"].split("/")) - def _get_binance_bulk_pair_list(self, reference_coin, coin): + def _get_binance_bulk_pair_list(self, reference_coin: str = config.FIAT, coin) -> list: - def get_pair(coin, reference_coin): + def get_pair(coin, reference_coin:str): for market in self.markets: if market[0] == coin and market[1] == reference_coin: @@ -463,7 +463,7 @@ def get_pair(coin, reference_coin): else: return [pair, ] - def _get_binance_bulk_pair_data(self, operations, symbol, invert=False): + def _get_binance_bulk_pair_data(self, operations: list, symbol: str, invert: str=False) ->list: timestamps = [] timestamppairs = [] data = [] @@ -500,7 +500,7 @@ def _get_binance_bulk_pair_data(self, operations, symbol, invert=False): data.append((operation, min(ts, key=lambda x: x[0])[1][1])) return data - def preload_price_data(self, operations, coin): + def preload_price_data(self, operations: list, coin: str): reference_coin = config.FIAT # get pairs used for calculating the price From a1b9f8e6e8a09d84c9c2a48042001969cf55a81a Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Wed, 31 Mar 2021 15:18:36 +0200 Subject: [PATCH 04/53] added walrus operator --- src/price_data.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/price_data.py b/src/price_data.py index a435d255..1347dcb0 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -449,9 +449,10 @@ def get_pair(coin, reference_coin:str): elif market[1] == coin and market[0] == reference_coin: return [reference_coin, coin, True] # True=inverted - pair = get_pair(coin, reference_coin) - if not pair: + if pair := get_pair(coin, reference_coin) + return [pair, ] + else: for market in self.markets: pair = get_pair(market[1], reference_coin) @@ -460,8 +461,6 @@ def get_pair(coin, reference_coin:str): return [[market[0], market[1], False], pair] elif market[1] == coin: return [[market[1], market[0], True], pair] - else: - return [pair, ] def _get_binance_bulk_pair_data(self, operations: list, symbol: str, invert: str=False) ->list: timestamps = [] From 5089a9e192aa84d189752a9be1134c8df69e7473 Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Wed, 31 Mar 2021 15:42:32 +0200 Subject: [PATCH 05/53] fixes smaller issues --- src/price_data.py | 67 ++++++++++++++++++++++------------------------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/src/price_data.py b/src/price_data.py index 1347dcb0..d1f7ef47 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -439,50 +439,49 @@ def initialize_ccxt(self): # caches a list of all pairs on the exchange self.markets.append(market["symbol"].split("/")) - def _get_binance_bulk_pair_list(self, reference_coin: str = config.FIAT, coin) -> list: - + def _get_bulk_pair_list(self, coin,reference_coin: str = config.FIAT) -> list: + def cmp_asset_pairs(our_pair: tuple[str, str], market_pair: tuple[str, str]) -> Optional[tuple[str, str, bool]]: + if our_pair == market_pair: + return *market_pair, False + if reversed(our_pair) == market_pair: + return *market_pair, True + return None + def get_pair(coin, reference_coin:str): - + our_symbols = [coin, reference_coin] for market in self.markets: - if market[0] == coin and market[1] == reference_coin: - return [coin, reference_coin, False] # False=not inverted - elif market[1] == coin and market[0] == reference_coin: - return [reference_coin, coin, True] # True=inverted + if cmp := cmp_asset_pairs(our_symbols, market): + return cmp - if pair := get_pair(coin, reference_coin) - return [pair, ] + if pair := get_pair(coin, reference_coin): + return [pair] else: for market in self.markets: - pair = get_pair(market[1], reference_coin) - - if pair: + if pair:=get_pair(market[1], reference_coin): if market[0] == coin: - return [[market[0], market[1], False], pair] - elif market[1] == coin: - return [[market[1], market[0], True], pair] + return [(*market, False), pair] + if market[1] == coin: + return [(*market, True), pair] - def _get_binance_bulk_pair_data(self, operations: list, symbol: str, invert: str=False) ->list: + def _get_bulk_pair_data(self, operations: list, symbol: str, invert: str=False) ->list: timestamps = [] timestamppairs = [] data = [] - for op in operations: - timestamps.append(op.utc_time) + timestamps = (op.utc_time for op in operations) - while len(timestamps) > 0: - timestamp = timestamps.pop(0) + for timestamp in timestamps: if len(timestamppairs) > 0 and timestamppairs[-1][0]+datetime.timedelta(minutes=995) > timestamp: timestamppairs[-1].append(timestamp) else: - timestamppairs.append([timestamp, ]) + timestamppairs.append([timestamp]) for batch in timestamppairs: # ccxt works with timestamps in milliseconds - last = int(max(batch).timestamp() * 1000) - first = int(min(batch).timestamp() * 1000) - + first = misc.to_ms_timestamp(batch[0]) + last = misc.to_ms_timestamp(batch[-1]) if invert: tempdata = list( map(lambda x: (x[0], 1/((x[1]+x[4])/2)), self.get_candles(first, last, symbol))) @@ -495,7 +494,7 @@ def _get_binance_bulk_pair_data(self, operations: list, symbol: str, invert: str # TODO discuss which candle is picked current is closest to original date (often off by about 1-20s, but can be after the Trade) # times do not always line up perfectly so take one nearest ts = list( - map(lambda x: (abs(operation.timestamp()*1000-x[0]), x), tempdata)) + map(lambda x: (abs(misc.to_ms_timestamp(operation.timestamp)*1000-x[0]), x), tempdata)) data.append((operation, min(ts, key=lambda x: x[0])[1][1])) return data @@ -503,20 +502,17 @@ def preload_price_data(self, operations: list, coin: str): reference_coin = config.FIAT # get pairs used for calculating the price - lis = self._get_binance_bulk_pair_list(reference_coin, coin) db_path = self.get_db_path("binance") operations_filtered = [] tablename = self.get_tablename(coin, reference_coin) - if lis: + if lis:=self._get_bulk_pair_list(coin,reference_coin): - for operation in operations: - if not self.__get_price_db(db_path, tablename, operation.utc_time): - operations_filtered.append(operation) + operations_filtered = [op for op in operations if not self.__get_price_db(db_path, tablename, op.utc_time)] # len 1== direct pairing with base currency if len(lis) == 1 and lis[0]: - data = self._get_binance_bulk_pair_data( + data = self._get_bulk_pair_data( operations_filtered, f"{lis[0][0]}/{lis[0][1]}", lis[0][2]) for element in data: self.__set_price_db(db_path, tablename, @@ -525,10 +521,10 @@ def preload_price_data(self, operations: list, coin: str): # len 2 == calculates price using two pairs e.g IOTA/ETH + ETH/EUR elif len(lis) == 2 and lis[0] and lis[1]: # get data for first pair - data = self._get_binance_bulk_pair_data( + data = self._get_bulk_pair_data( operations_filtered, f"{lis[0][0]}/{lis[0][1]}", lis[0][2]) # get data for second pair - data2 = self._get_binance_bulk_pair_data( + data2 = self._get_bulk_pair_data( operations_filtered, f"{lis[1][0]}/{lis[1][1]}", lis[1][2]) for element in data: @@ -542,6 +538,5 @@ def preload_price_data(self, operations: list, coin: str): if factor: price = element[1]*factor # check if timestamp already exists to prevent a duplicate error - if not self.__get_price_db(db_path, tablename, element[0]): - self.__set_price_db( - db_path, tablename, element[0], price) + self.set_price_db( + db_path, tablename, element[0], price) From 1ac3e7769fdbd2f244be482b1c709967a35b1c03 Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Wed, 31 Mar 2021 15:48:27 +0200 Subject: [PATCH 06/53] change warning to error --- src/price_data.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/price_data.py b/src/price_data.py index d1f7ef47..54c4708d 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -423,9 +423,9 @@ def get_candles(self, start: int, stop: int, symbol: str) ->list: # get 2min before and after range return self.exchange.fetch_ohlcv(symbol, '1m', start-1000*60*2, max(int((stop-start)/1000/60)+5, 1)) else: - logging.warning( + logging.error( "fetchOHLCV not implemented on exchange, skipping priceloading using ohlcv") - raise None + return None def initialize_ccxt(self): exchange_id = 'binance' From ed50892286cd9ceee9ea449e5e2b69f8f8ac6a58 Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Sun, 4 Apr 2021 22:11:21 +0200 Subject: [PATCH 07/53] PoC for a grpah based solution --- src/graph.py | 195 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 src/graph.py diff --git a/src/graph.py b/src/graph.py new file mode 100644 index 00000000..77076857 --- /dev/null +++ b/src/graph.py @@ -0,0 +1,195 @@ +import ccxt +from datetime import datetime +from time import sleep, time_ns + + +class graph: + + def __init__(self, gdict=None, cache=None): + if not gdict: + gdict = {} + if not cache: + cache = {} + self.gdict = gdict + self.cache = cache + + def edges(self): + return self.findedges() +# Find the distinct list of edges + + def findedges(self): + edgename = [] + for vrtx in self.gdict: + for nxtvrtx in self.gdict[vrtx]: + if {nxtvrtx, vrtx} not in edgename: + edgename.append({vrtx, nxtvrtx}) + return edgename + + def getVertices(self): + return list(self.gdict.keys()) + +# Add the vertex as a key + def addVertex(self, vrtx): + if vrtx not in self.gdict: + self.gdict[vrtx] = [] + + def addEdge(self, vrtx1, vrtx2, data): + if vrtx1 in self.gdict: + self.gdict[vrtx1].append((vrtx2, data)) + else: + self.gdict[vrtx1] = [vrtx2] + + def _getpath(self, start, stop, maxdepth, depth=0): + paths = [] + if (edges := g.gdict.get(start)) and maxdepth > depth: + for edge in edges: + if depth == 0 and edge[0] == stop: + paths.append([edge, ]) + elif edge[0] == stop: + paths.append(edge) + else: + path = self._getpath( + edge[0], stop, maxdepth, depth=depth+1) + if len(path) and path is not None: + for p in path: + if p[0] == stop: + newpath = [edge, ] + newpath.append(p) + paths.append(newpath) + #if len(paths)>3 and depth in [0,1]: + # print(len(paths)) + return paths + + def getpath(self, start, stop, starttime=0, stoptime=0, preferredexchange=None, maxdepth=3): + def comb_sort_key(path): + if preferredexchange: + # prioritze pairs with the preferred exchange + return len(path)+sum([0 if pair[1]["exchange"] == preferredexchange else 1 for pair in path]) + else: + return len(path) + + def check_cache(pair): + + if pair[1].get("starttime") or pair[1].get("stoptime"): + return True, pair + if cacheres := self.cache.get(pair[1]["exchange"]+pair[1]["symbol"]): + pair[1]["starttime"] = cacheres[0] + pair[1]["stoptime"] = cacheres[1] + pair[1]["avg_vol"] = cacheres[2] + return True, pair + return False, pair + + def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): + rangeinms = 0 + timeframe = int(6.048e+8) # week in ms + if starttimestamp == 0: + starttimestamp = 1325372400*1000 + if stoptimestamp == -1: + stoptimestamp = time_ns() // 1_000_000 # get cur time in ms + starttimestamp -= timeframe # to handle edge cases + if stoptimestamp > starttimestamp: + rangeinms = stoptimestamp-starttimestamp + else: + rangeinms = 0 # maybe throw error + + # add one candle to the end to ensure the needed timeslot is in the requested candles + rangeincandles = int(rangeinms/timeframe)+1 + + #todo: cache already used pairs + globalstarttime = 0 + globalstoptime = 0 + for i in range(len(path)): + cached, path[i] = check_cache(path[i]) + if not cached: + exchange_class = getattr(ccxt, path[i][1]["exchange"]) + exchange = exchange_class() + sleep(exchange.rateLimit / 1000) + timeframeexchange = exchange.timeframes.get("1w") + if timeframeexchange: # this must be handled better maybe choose timeframe dynamically + # maybe cache this per pair + ohlcv = exchange.fetch_ohlcv( + path[i][1]["symbol"], "1w", starttimestamp, rangeincandles) + else: + ohlcv = [] # do not check fail later + if len(ohlcv) > 1: + # (candle ends after the date + timeframe) + path[i][1]["stoptime"] = ohlcv[-1][0]+timeframe + path[i][1]["avg_vol"] = sum( + [vol[-1] for vol in ohlcv])/len(ohlcv) # avg vol in curr + path[i][1]["starttime"] = ohlcv[0][0] + if path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0: + globalstoptime = path[i][1]["stoptime"] + if path[i][1]["starttime"] > globalstarttime: + globalstarttime = path[i][1]["starttime"] + else: + path[i][1]["stoptime"] = 0 + path[i][1]["starttime"] = 0 + path[i][1]["avg_vol"] = 0 + self.cache[path[i][1]["exchange"]+path[i][1]["symbol"]] = ( + path[i][1]["starttime"], path[i][1]["stoptime"], path[i][1]["avg_vol"]) + else: + if path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0: + globalstoptime = path[i][1]["stoptime"] + if path[i][1]["starttime"] > globalstarttime: + globalstarttime = path[i][1]["starttime"] + ohlcv = [] + print(len(ohlcv)-rangeincandles, rangeincandles) + return (globalstarttime, globalstoptime), path + + # get all possible paths which are no longer than 4 pairs long + paths = self._getpath(start, stop, maxdepth) + # sort by path length to get minimal conversion chain to reduce error + paths = sorted(paths, key=comb_sort_key) + #get timeframe in which a path is viable + for path in paths: + timest, newpath = get_active_timeframe(path) + # this is implemented as a generator (hence the yield) to reduce the amount of computing needed. if the first + if starttime == 0 and stoptime == 0: + yield timest, newpath + elif starttime == 0: + if stoptime < timest[1]: + yield timest, newpath + elif stoptime == 0: + if starttime > timest[0]: + yield timest, newpath + else: + if stoptime < timest[1] and starttime > timest[0]: + yield timest, newpath + + +if __name__ == "__main__": + g = graph() + allpairs = [] + for exchange_id in ["binance", "coinbase", "kraken", "coinbasepro", "aax", "bittrex", "bitvavo"]: + exchange_class = getattr(ccxt, exchange_id) + exchange = exchange_class() + markets = [] + markets = exchange.fetch_markets() + if exchange.has['fetchOHLCV']: + + allpairs.extend( + [(i["base"], i["quote"], exchange_id, i["symbol"])for i in markets]) + else: + print( + f"{exchange.name} Does not support fetch ohlcv. ignoring exchange and {len(markets)} pairs.") + #print(len([(i["base"],i["quote"],exchange_id,i["symbol"])for i in markets]),len(markets)) + allpairs = list(set(allpairs)) + print("Total Pairs to check:", len(allpairs)) + for i in allpairs: + base = i[0] + quote = i[1] + g.addVertex(base) + g.addVertex(quote) + g.addEdge(base, quote, { + "exchange": i[2], "symbol": i[3], "inverted": False}) + g.addEdge(quote, base, { + "exchange": i[2], "symbol": i[3], "inverted": True}) + + start = "IOTA" + to = "EUR" + preferredexchange = "binance" + path = g.getpath(start, to, maxdepth=2, + preferredexchange=preferredexchange) + #debug only in actual use we would iterate over the path object fetching new paths as needed + path = list(path) + print(len(path)) From 657666baa7e2dfd3cade03ce2d3d456246cc1740 Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Thu, 8 Apr 2021 09:42:32 +0200 Subject: [PATCH 08/53] working graph implementation --- src/balance_queue.py | 2 +- src/graph.py | 98 ++++++++++++++-------- src/log_config.py | 1 + src/price_data.py | 194 +++++++++++++++++++++---------------------- src/taxman.py | 4 +- 5 files changed, 163 insertions(+), 136 deletions(-) diff --git a/src/balance_queue.py b/src/balance_queue.py index a8c8594e..d98aa942 100644 --- a/src/balance_queue.py +++ b/src/balance_queue.py @@ -125,7 +125,7 @@ def sell(self, change: decimal.Decimal) -> Optional[list[transaction.SoldCoin]]: return None not_sold = bop.op.change - bop.sold - assert not_sold > 0 + assert not_sold >= 0 if not_sold > change: bop.sold += change diff --git a/src/graph.py b/src/graph.py index 77076857..fe80af69 100644 --- a/src/graph.py +++ b/src/graph.py @@ -3,15 +3,44 @@ from time import sleep, time_ns -class graph: +class PricePath: - def __init__(self, gdict=None, cache=None): + def __init__(self, exchanges:list=None,gdict:dict=None, cache:dict=None): if not gdict: gdict = {} if not cache: cache = {} + if not exchanges: + exchanges = ["binance","coinbasepro"] self.gdict = gdict self.cache = cache + self.priority={} + allpairs=[] + + for exchange_id in exchanges: + exchange_class = getattr(ccxt, exchange_id) + exchange = exchange_class() + markets = [] + markets = exchange.fetch_markets() + if exchange.has['fetchOHLCV']: + + allpairs.extend( + [(i["base"], i["quote"], exchange_id, i["symbol"])for i in markets]) + else: + print( + f"{exchange.name} Does not support fetch ohlcv. ignoring exchange and {len(markets)} pairs.") + allpairs = list(set(allpairs)) + #print("Total Pairs to check:", len(allpairs)) + allpairs.sort(key=lambda x: x[3]) + for i in allpairs: + base = i[0] + quote = i[1] + self.addVertex(base) + self.addVertex(quote) + self.addEdge(base, quote, { + "exchange": i[2], "symbol": i[3], "inverted": False}) + self.addEdge(quote, base, { + "exchange": i[2], "symbol": i[3], "inverted": True}) def edges(self): return self.findedges() @@ -41,7 +70,7 @@ def addEdge(self, vrtx1, vrtx2, data): def _getpath(self, start, stop, maxdepth, depth=0): paths = [] - if (edges := g.gdict.get(start)) and maxdepth > depth: + if (edges := self.gdict.get(start)) and maxdepth > depth: for edge in edges: if depth == 0 and edge[0] == stop: paths.append([edge, ]) @@ -56,15 +85,36 @@ def _getpath(self, start, stop, maxdepth, depth=0): newpath = [edge, ] newpath.append(p) paths.append(newpath) - #if len(paths)>3 and depth in [0,1]: - # print(len(paths)) return paths + def change_prio(self,key,value): + ke="-".join(key) + if self.priority.get(ke): + self.priority[ke]+=value + else: + self.priority[ke]=value + def getpath(self, start, stop, starttime=0, stoptime=0, preferredexchange=None, maxdepth=3): def comb_sort_key(path): if preferredexchange: # prioritze pairs with the preferred exchange - return len(path)+sum([0 if pair[1]["exchange"] == preferredexchange else 1 for pair in path]) + volume=1 + volumenew=0 + if not (priority:=self.priority.get("-".join([ a[1]["symbol"] for a in path]))): + priority=0 + for c in [a if (a := check_cache(pair)) else None for pair in path]: + if c and c[0]: + if c[1][1]["stoptime"]==0: + break + elif c[1][1]["avg_vol"]!=0: + volumenew+=c[1][1]["avg_vol"] #is very much off because volume is not in the same currency something for later + + + else: + break + else: + volume=1/volumenew + return len(path)+sum([0 if pair[1]["exchange"] == preferredexchange else 1 for pair in path])+volume+priority else: return len(path) @@ -103,7 +153,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): if not cached: exchange_class = getattr(ccxt, path[i][1]["exchange"]) exchange = exchange_class() - sleep(exchange.rateLimit / 1000) + sleep(exchange.rateLimit / 1000) #maybe a more elaborate ratelimit wich counts execution time to waiting timeframeexchange = exchange.timeframes.get("1w") if timeframeexchange: # this must be handled better maybe choose timeframe dynamically # maybe cache this per pair @@ -128,12 +178,12 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): self.cache[path[i][1]["exchange"]+path[i][1]["symbol"]] = ( path[i][1]["starttime"], path[i][1]["stoptime"], path[i][1]["avg_vol"]) else: - if path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0: + + if (path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0) and path[i][1]["stoptime"]!=0: globalstoptime = path[i][1]["stoptime"] - if path[i][1]["starttime"] > globalstarttime: + if path[i][1]["starttime"] > globalstarttime : globalstarttime = path[i][1]["starttime"] ohlcv = [] - print(len(ohlcv)-rangeincandles, rangeincandles) return (globalstarttime, globalstoptime), path # get all possible paths which are no longer than 4 pairs long @@ -152,39 +202,15 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): elif stoptime == 0: if starttime > timest[0]: yield timest, newpath + else: if stoptime < timest[1] and starttime > timest[0]: yield timest, newpath if __name__ == "__main__": - g = graph() + g = PricePath() allpairs = [] - for exchange_id in ["binance", "coinbase", "kraken", "coinbasepro", "aax", "bittrex", "bitvavo"]: - exchange_class = getattr(ccxt, exchange_id) - exchange = exchange_class() - markets = [] - markets = exchange.fetch_markets() - if exchange.has['fetchOHLCV']: - - allpairs.extend( - [(i["base"], i["quote"], exchange_id, i["symbol"])for i in markets]) - else: - print( - f"{exchange.name} Does not support fetch ohlcv. ignoring exchange and {len(markets)} pairs.") - #print(len([(i["base"],i["quote"],exchange_id,i["symbol"])for i in markets]),len(markets)) - allpairs = list(set(allpairs)) - print("Total Pairs to check:", len(allpairs)) - for i in allpairs: - base = i[0] - quote = i[1] - g.addVertex(base) - g.addVertex(quote) - g.addEdge(base, quote, { - "exchange": i[2], "symbol": i[3], "inverted": False}) - g.addEdge(quote, base, { - "exchange": i[2], "symbol": i[3], "inverted": True}) - start = "IOTA" to = "EUR" preferredexchange = "binance" diff --git a/src/log_config.py b/src/log_config.py index 9cf94aca..d480c108 100644 --- a/src/log_config.py +++ b/src/log_config.py @@ -35,3 +35,4 @@ # Disable urllib debug messages logging.getLogger("urllib3").propagate = False +logging.getLogger("ccxt").propagate = False diff --git a/src/price_data.py b/src/price_data.py index 54c4708d..268802a7 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -32,6 +32,7 @@ import misc import transaction from core import kraken_pair_map +from graph import PricePath log = logging.getLogger(__name__) @@ -43,6 +44,9 @@ class PriceData: + def __init__(self): + self.path=PricePath() + def get_db_path(self, platform: str) -> Path: return Path(config.DATA_PATH, f"{platform}.db") @@ -417,126 +421,122 @@ def get_cost( return price * tr.sold raise NotImplementedError - def get_candles(self, start: int, stop: int, symbol: str) ->list: - if self.exchange.has['fetchOHLCV']: - sleep(self.exchange.rateLimit / 1000) # time.sleep wants seconds + def get_candles(self, start: int, stop: int, symbol: str,exchange: str) ->list: + exchange_class = getattr(ccxt, exchange) + exchange = exchange_class() + if exchange.has['fetchOHLCV']: + sleep(exchange.rateLimit / 1000) # time.sleep wants seconds # get 2min before and after range - return self.exchange.fetch_ohlcv(symbol, '1m', start-1000*60*2, max(int((stop-start)/1000/60)+5, 1)) + startval=start-1000*60*2 + rang=max(int((stop-start)/1000/60)+2, 1) + return exchange.fetch_ohlcv(symbol, '1m', startval, rang ) else: - logging.error( + log.error( "fetchOHLCV not implemented on exchange, skipping priceloading using ohlcv") return None - def initialize_ccxt(self): - exchange_id = 'binance' - exchange_class = getattr(ccxt, exchange_id) - self.exchange = exchange_class() - self.markets = [] - markets = self.exchange.fetch_markets() - - for market in markets: - # may not apply for all exchanges, currently works for binance - # caches a list of all pairs on the exchange - self.markets.append(market["symbol"].split("/")) - - def _get_bulk_pair_list(self, coin,reference_coin: str = config.FIAT) -> list: - def cmp_asset_pairs(our_pair: tuple[str, str], market_pair: tuple[str, str]) -> Optional[tuple[str, str, bool]]: - if our_pair == market_pair: - return *market_pair, False - if reversed(our_pair) == market_pair: - return *market_pair, True - return None - - def get_pair(coin, reference_coin:str): - our_symbols = [coin, reference_coin] - for market in self.markets: - if cmp := cmp_asset_pairs(our_symbols, market): - return cmp - - if pair := get_pair(coin, reference_coin): - return [pair] + def _get_bulk_pair_data_path(self, operations: list, coin: str,reference_coin: str,preferredexchange:str="binance") ->list: + def merge_prices(a:list,b:list=None): + prices=[] + if not b : + return a + for i in a: + factor=None + for j in b: + if i[0]==j[0]: + factor=j[1] + break + prices.append((i[0],i[1]*factor)) + return prices - else: - for market in self.markets: - if pair:=get_pair(market[1], reference_coin): - if market[0] == coin: - return [(*market, False), pair] - if market[1] == coin: - return [(*market, True), pair] - - def _get_bulk_pair_data(self, operations: list, symbol: str, invert: str=False) ->list: timestamps = [] timestamppairs = [] - data = [] - + maxminutes=300 #coinbasepro only allows a max of 300 minutes need a better solution timestamps = (op.utc_time for op in operations) + if not preferredexchange: + preferredexchange="binance" for timestamp in timestamps: - if len(timestamppairs) > 0 and timestamppairs[-1][0]+datetime.timedelta(minutes=995) > timestamp: + if len(timestamppairs) > 0 and timestamppairs[-1][0]+datetime.timedelta(minutes=maxminutes-4) > timestamp: timestamppairs[-1].append(timestamp) else: timestamppairs.append([timestamp]) - + datacomb=[] for batch in timestamppairs: # ccxt works with timestamps in milliseconds first = misc.to_ms_timestamp(batch[0]) last = misc.to_ms_timestamp(batch[-1]) - if invert: - tempdata = list( - map(lambda x: (x[0], 1/((x[1]+x[4])/2)), self.get_candles(first, last, symbol))) - else: - tempdata = list( - map(lambda x: (x[0], (x[1]+x[4])/2), self.get_candles(first, last, symbol))) - - if tempdata: - for operation in batch: - # TODO discuss which candle is picked current is closest to original date (often off by about 1-20s, but can be after the Trade) - # times do not always line up perfectly so take one nearest - ts = list( - map(lambda x: (abs(misc.to_ms_timestamp(operation.timestamp)*1000-x[0]), x), tempdata)) - data.append((operation, min(ts, key=lambda x: x[0])[1][1])) - return data - - def preload_price_data(self, operations: list, coin: str): + firststr=batch[0].strftime('%d-%b-%Y (%H:%M)') + laststr=batch[-1].strftime('%d-%b-%Y (%H:%M)') + log.info(f"getting data from {str(firststr)} to {str(laststr)} for {str(coin)}") + path=self.path.getpath(coin,reference_coin,first,last,preferredexchange=preferredexchange) + for p in path: + tempdatalis=[] + printstr=[ a[1]["symbol"] for a in p[1] ] + log.debug(f"found path over {' -> '.join(printstr)}") + for i in range(len(p[1])): + tempdatalis.append([]) + symbol=p[1][i][1]["symbol"] + exchange=p[1][i][1]["exchange"] + invert=p[1][i][1]["inverted"] + candles=self.get_candles(first, last, symbol,exchange) + if invert: + tempdata = list( + map(lambda x: (x[0], 1/((x[1]+x[4])/2)), candles)) + else: + tempdata = list( + map(lambda x: (x[0], (x[1]+x[4])/2), candles)) + + if tempdata: + for operation in batch: + # TODO discuss which candle is picked current is closest to original date (often off by about 1-20s, but can be after the Trade) + # times do not always line up perfectly so take one nearest + ts = list( + map(lambda x: (abs(misc.to_ms_timestamp(operation)*1000-x[0]), x), tempdata)) + tempdatalis[i].append((operation, min(ts, key=lambda x: x[0])[1][1])) + else: + tempdatalis=[] + self.path.change_prio(printstr,0.2) # do not try already failed again + break + if tempdatalis: + wantedlen=len(tempdatalis[0]) + for li in tempdatalis: + if not len(li)==wantedlen: + self.path.change_prio(printstr,0.2) + break + else: + prices=[] + for d in tempdatalis: + prices=merge_prices(d,prices) + datacomb.extend(prices) + break + log.debug("path failed trying new path") + + return datacomb + + def preload_price_data_path(self,operations: list, coin: str,exchange:str=None): + + reference_coin = config.FIAT # get pairs used for calculating the price - db_path = self.get_db_path("binance") operations_filtered = [] + tablename = self.get_tablename(coin, reference_coin) + operations_filtered = [op for op in operations if not self.__get_price_db(self.get_db_path(op.platform), tablename, op.utc_time)] + operations_grouped={} + if operations_filtered: + for i in operations_filtered: + if i.coin==config.FIAT: + pass + elif operations_grouped.get(i.platform): + operations_grouped[i.platform].append(i) + else: + operations_grouped[i.platform]=[i] + for platf in operations_grouped.keys(): + data=self._get_bulk_pair_data_path(operations_grouped[platf],coin,reference_coin,preferredexchange=platf) + for p in data: + self.set_price_db(platf,coin,reference_coin, p[0], p[1]) - if lis:=self._get_bulk_pair_list(coin,reference_coin): - - operations_filtered = [op for op in operations if not self.__get_price_db(db_path, tablename, op.utc_time)] - - # len 1== direct pairing with base currency - if len(lis) == 1 and lis[0]: - data = self._get_bulk_pair_data( - operations_filtered, f"{lis[0][0]}/{lis[0][1]}", lis[0][2]) - for element in data: - self.__set_price_db(db_path, tablename, - element[0], element[1]) - - # len 2 == calculates price using two pairs e.g IOTA/ETH + ETH/EUR - elif len(lis) == 2 and lis[0] and lis[1]: - # get data for first pair - data = self._get_bulk_pair_data( - operations_filtered, f"{lis[0][0]}/{lis[0][1]}", lis[0][2]) - # get data for second pair - data2 = self._get_bulk_pair_data( - operations_filtered, f"{lis[1][0]}/{lis[1][1]}", lis[1][2]) - - for element in data: - factor = None - - for element2 in data2: - if element[0] == element2[0]: - factor = element2[1] - break - if factor: - price = element[1]*factor - # check if timestamp already exists to prevent a duplicate error - self.set_price_db( - db_path, tablename, element[0], price) diff --git a/src/taxman.py b/src/taxman.py index 58fd5aea..1e3e7e75 100644 --- a/src/taxman.py +++ b/src/taxman.py @@ -46,7 +46,7 @@ def __init__(self, book: Book, price_data: PriceData) -> None: except AttributeError: raise NotImplementedError(f"Unable to evaluate taxation for {country=}.") - self.price_data.initialize_ccxt() + if config.PRINCIPLE == core.Principle.FIFO: self.BalanceType = balance_queue.BalanceQueue @@ -180,7 +180,7 @@ def evaluate_taxation(self) -> None: log.debug("Starting evaluation...") for coin, operations in misc.group_by(self.book.operations, "coin").items(): operations = sorted(operations, key=lambda op: op.utc_time) - self.price_data.preload_price_data(operations,coin) + self.price_data.preload_price_data_path(operations,coin) self.__evaluate_taxation(coin, operations) def print_evaluation(self) -> None: From 74935e1713f1260af3e539c43000a92540c7f795 Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Thu, 8 Apr 2021 10:03:42 +0200 Subject: [PATCH 09/53] better batching implementation --- src/price_data.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/price_data.py b/src/price_data.py index 268802a7..f8b4d9f6 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -456,11 +456,12 @@ def merge_prices(a:list,b:list=None): if not preferredexchange: preferredexchange="binance" + current_first = None for timestamp in timestamps: - - if len(timestamppairs) > 0 and timestamppairs[-1][0]+datetime.timedelta(minutes=maxminutes-4) > timestamp: + if current_first and current_first+datetime.timedelta(minutes=maxminutes-4) > timestamp: timestamppairs[-1].append(timestamp) else: + current_first = timestamp timestamppairs.append([timestamp]) datacomb=[] for batch in timestamppairs: From 48c24c8ce447b609a3c01fc3b5cb393d0275a5dc Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Thu, 8 Apr 2021 10:18:14 +0200 Subject: [PATCH 10/53] revert accidental change --- src/balance_queue.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/balance_queue.py b/src/balance_queue.py index d98aa942..a8c8594e 100644 --- a/src/balance_queue.py +++ b/src/balance_queue.py @@ -125,7 +125,7 @@ def sell(self, change: decimal.Decimal) -> Optional[list[transaction.SoldCoin]]: return None not_sold = bop.op.change - bop.sold - assert not_sold >= 0 + assert not_sold > 0 if not_sold > change: bop.sold += change From 50378a6997e5fa68edaf21a8d8874fc4f44706b4 Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Thu, 8 Apr 2021 11:50:30 +0200 Subject: [PATCH 11/53] formatting --- src/graph.py | 75 ++++++++++++++++---------------- src/price_data.py | 106 +++++++++++++++++++++++----------------------- 2 files changed, 92 insertions(+), 89 deletions(-) diff --git a/src/graph.py b/src/graph.py index fe80af69..7e49b596 100644 --- a/src/graph.py +++ b/src/graph.py @@ -5,17 +5,17 @@ class PricePath: - def __init__(self, exchanges:list=None,gdict:dict=None, cache:dict=None): + def __init__(self, exchanges: list = None, gdict: dict = None, cache: dict = None): if not gdict: gdict = {} if not cache: cache = {} if not exchanges: - exchanges = ["binance","coinbasepro"] + exchanges = ["binance", "coinbasepro"] self.gdict = gdict self.cache = cache - self.priority={} - allpairs=[] + self.priority = {} + allpairs = [] for exchange_id in exchanges: exchange_class = getattr(ccxt, exchange_id) @@ -38,9 +38,9 @@ def __init__(self, exchanges:list=None,gdict:dict=None, cache:dict=None): self.addVertex(base) self.addVertex(quote) self.addEdge(base, quote, { - "exchange": i[2], "symbol": i[3], "inverted": False}) + "exchange": i[2], "symbol": i[3], "inverted": False}) self.addEdge(quote, base, { - "exchange": i[2], "symbol": i[3], "inverted": True}) + "exchange": i[2], "symbol": i[3], "inverted": True}) def edges(self): return self.findedges() @@ -59,8 +59,8 @@ def getVertices(self): # Add the vertex as a key def addVertex(self, vrtx): - if vrtx not in self.gdict: - self.gdict[vrtx] = [] + if vrtx not in self.gdict: + self.gdict[vrtx] = [] def addEdge(self, vrtx1, vrtx2, data): if vrtx1 in self.gdict: @@ -78,7 +78,7 @@ def _getpath(self, start, stop, maxdepth, depth=0): paths.append(edge) else: path = self._getpath( - edge[0], stop, maxdepth, depth=depth+1) + edge[0], stop, maxdepth, depth=depth + 1) if len(path) and path is not None: for p in path: if p[0] == stop: @@ -87,34 +87,34 @@ def _getpath(self, start, stop, maxdepth, depth=0): paths.append(newpath) return paths - def change_prio(self,key,value): - ke="-".join(key) + def change_prio(self, key, value): + ke = "-".join(key) if self.priority.get(ke): - self.priority[ke]+=value + self.priority[ke] += value else: - self.priority[ke]=value + self.priority[ke] = value def getpath(self, start, stop, starttime=0, stoptime=0, preferredexchange=None, maxdepth=3): def comb_sort_key(path): if preferredexchange: # prioritze pairs with the preferred exchange - volume=1 - volumenew=0 - if not (priority:=self.priority.get("-".join([ a[1]["symbol"] for a in path]))): - priority=0 + volume = 1 + volumenew = 0 + if not (priority := self.priority.get("-".join([a[1]["symbol"] for a in path]))): + priority = 0 for c in [a if (a := check_cache(pair)) else None for pair in path]: if c and c[0]: - if c[1][1]["stoptime"]==0: + if c[1][1]["stoptime"] == 0: break - elif c[1][1]["avg_vol"]!=0: - volumenew+=c[1][1]["avg_vol"] #is very much off because volume is not in the same currency something for later - + elif c[1][1]["avg_vol"] != 0: + # is very much off because volume is not in the same currency something for later + volumenew += c[1][1]["avg_vol"] else: break else: - volume=1/volumenew - return len(path)+sum([0 if pair[1]["exchange"] == preferredexchange else 1 for pair in path])+volume+priority + volume = 1 / volumenew + return len(path) + sum([0 if pair[1]["exchange"] == preferredexchange else 1 for pair in path]) + volume + priority else: return len(path) @@ -122,7 +122,7 @@ def check_cache(pair): if pair[1].get("starttime") or pair[1].get("stoptime"): return True, pair - if cacheres := self.cache.get(pair[1]["exchange"]+pair[1]["symbol"]): + if cacheres := self.cache.get(pair[1]["exchange"] + pair[1]["symbol"]): pair[1]["starttime"] = cacheres[0] pair[1]["stoptime"] = cacheres[1] pair[1]["avg_vol"] = cacheres[2] @@ -133,19 +133,19 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): rangeinms = 0 timeframe = int(6.048e+8) # week in ms if starttimestamp == 0: - starttimestamp = 1325372400*1000 + starttimestamp = 1325372400 * 1000 if stoptimestamp == -1: stoptimestamp = time_ns() // 1_000_000 # get cur time in ms starttimestamp -= timeframe # to handle edge cases if stoptimestamp > starttimestamp: - rangeinms = stoptimestamp-starttimestamp + rangeinms = stoptimestamp - starttimestamp else: rangeinms = 0 # maybe throw error # add one candle to the end to ensure the needed timeslot is in the requested candles - rangeincandles = int(rangeinms/timeframe)+1 + rangeincandles = int(rangeinms / timeframe) + 1 - #todo: cache already used pairs + # todo: cache already used pairs globalstarttime = 0 globalstoptime = 0 for i in range(len(path)): @@ -153,7 +153,8 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): if not cached: exchange_class = getattr(ccxt, path[i][1]["exchange"]) exchange = exchange_class() - sleep(exchange.rateLimit / 1000) #maybe a more elaborate ratelimit wich counts execution time to waiting + # maybe a more elaborate ratelimit wich counts execution time to waiting + sleep(exchange.rateLimit / 1000) timeframeexchange = exchange.timeframes.get("1w") if timeframeexchange: # this must be handled better maybe choose timeframe dynamically # maybe cache this per pair @@ -163,9 +164,9 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): ohlcv = [] # do not check fail later if len(ohlcv) > 1: # (candle ends after the date + timeframe) - path[i][1]["stoptime"] = ohlcv[-1][0]+timeframe + path[i][1]["stoptime"] = ohlcv[-1][0] + timeframe path[i][1]["avg_vol"] = sum( - [vol[-1] for vol in ohlcv])/len(ohlcv) # avg vol in curr + [vol[-1] for vol in ohlcv]) / len(ohlcv) # avg vol in curr path[i][1]["starttime"] = ohlcv[0][0] if path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0: globalstoptime = path[i][1]["stoptime"] @@ -175,13 +176,13 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): path[i][1]["stoptime"] = 0 path[i][1]["starttime"] = 0 path[i][1]["avg_vol"] = 0 - self.cache[path[i][1]["exchange"]+path[i][1]["symbol"]] = ( + self.cache[path[i][1]["exchange"] + path[i][1]["symbol"]] = ( path[i][1]["starttime"], path[i][1]["stoptime"], path[i][1]["avg_vol"]) else: - if (path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0) and path[i][1]["stoptime"]!=0: + if (path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0) and path[i][1]["stoptime"] != 0: globalstoptime = path[i][1]["stoptime"] - if path[i][1]["starttime"] > globalstarttime : + if path[i][1]["starttime"] > globalstarttime: globalstarttime = path[i][1]["starttime"] ohlcv = [] return (globalstarttime, globalstoptime), path @@ -190,7 +191,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): paths = self._getpath(start, stop, maxdepth) # sort by path length to get minimal conversion chain to reduce error paths = sorted(paths, key=comb_sort_key) - #get timeframe in which a path is viable + # get timeframe in which a path is viable for path in paths: timest, newpath = get_active_timeframe(path) # this is implemented as a generator (hence the yield) to reduce the amount of computing needed. if the first @@ -202,7 +203,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): elif stoptime == 0: if starttime > timest[0]: yield timest, newpath - + else: if stoptime < timest[1] and starttime > timest[0]: yield timest, newpath @@ -216,6 +217,6 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): preferredexchange = "binance" path = g.getpath(start, to, maxdepth=2, preferredexchange=preferredexchange) - #debug only in actual use we would iterate over the path object fetching new paths as needed + # debug only in actual use we would iterate over the path object fetching new paths as needed path = list(path) print(len(path)) diff --git a/src/price_data.py b/src/price_data.py index f8b4d9f6..8c36ab69 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -45,7 +45,7 @@ class PriceData: def __init__(self): - self.path=PricePath() + self.path = PricePath() def get_db_path(self, platform: str) -> Path: return Path(config.DATA_PATH, f"{platform}.db") @@ -420,124 +420,126 @@ def get_cost( if isinstance(tr, transaction.SoldCoin): return price * tr.sold raise NotImplementedError - - def get_candles(self, start: int, stop: int, symbol: str,exchange: str) ->list: + + def get_candles(self, start: int, stop: int, symbol: str, exchange: str) -> list: exchange_class = getattr(ccxt, exchange) exchange = exchange_class() if exchange.has['fetchOHLCV']: sleep(exchange.rateLimit / 1000) # time.sleep wants seconds # get 2min before and after range - startval=start-1000*60*2 - rang=max(int((stop-start)/1000/60)+2, 1) - return exchange.fetch_ohlcv(symbol, '1m', startval, rang ) + startval = start - 1000 * 60 * 2 + rang = max(int((stop - start) / 1000 / 60) + 2, 1) + return exchange.fetch_ohlcv(symbol, '1m', startval, rang) else: log.error( "fetchOHLCV not implemented on exchange, skipping priceloading using ohlcv") return None - def _get_bulk_pair_data_path(self, operations: list, coin: str,reference_coin: str,preferredexchange:str="binance") ->list: - def merge_prices(a:list,b:list=None): - prices=[] - if not b : + def _get_bulk_pair_data_path(self, operations: list, coin: str, reference_coin: str, preferredexchange: str = "binance") -> list: + def merge_prices(a: list, b: list = None): + prices = [] + if not b: return a for i in a: - factor=None + factor = None for j in b: - if i[0]==j[0]: - factor=j[1] + if i[0] == j[0]: + factor = j[1] break - prices.append((i[0],i[1]*factor)) + prices.append((i[0], i[1] * factor)) return prices timestamps = [] timestamppairs = [] - maxminutes=300 #coinbasepro only allows a max of 300 minutes need a better solution + maxminutes = 300 # coinbasepro only allows a max of 300 minutes need a better solution timestamps = (op.utc_time for op in operations) if not preferredexchange: - preferredexchange="binance" + preferredexchange = "binance" current_first = None for timestamp in timestamps: - if current_first and current_first+datetime.timedelta(minutes=maxminutes-4) > timestamp: + if current_first and current_first + datetime.timedelta(minutes=maxminutes - 4) > timestamp: timestamppairs[-1].append(timestamp) else: current_first = timestamp timestamppairs.append([timestamp]) - datacomb=[] + datacomb = [] for batch in timestamppairs: # ccxt works with timestamps in milliseconds first = misc.to_ms_timestamp(batch[0]) last = misc.to_ms_timestamp(batch[-1]) - firststr=batch[0].strftime('%d-%b-%Y (%H:%M)') - laststr=batch[-1].strftime('%d-%b-%Y (%H:%M)') - log.info(f"getting data from {str(firststr)} to {str(laststr)} for {str(coin)}") - path=self.path.getpath(coin,reference_coin,first,last,preferredexchange=preferredexchange) + firststr = batch[0].strftime('%d-%b-%Y (%H:%M)') + laststr = batch[-1].strftime('%d-%b-%Y (%H:%M)') + log.info( + f"getting data from {str(firststr)} to {str(laststr)} for {str(coin)}") + path = self.path.getpath(coin, reference_coin, first, + last, preferredexchange=preferredexchange) for p in path: - tempdatalis=[] - printstr=[ a[1]["symbol"] for a in p[1] ] + tempdatalis = [] + printstr = [a[1]["symbol"] for a in p[1]] log.debug(f"found path over {' -> '.join(printstr)}") for i in range(len(p[1])): tempdatalis.append([]) - symbol=p[1][i][1]["symbol"] - exchange=p[1][i][1]["exchange"] - invert=p[1][i][1]["inverted"] - candles=self.get_candles(first, last, symbol,exchange) + symbol = p[1][i][1]["symbol"] + exchange = p[1][i][1]["exchange"] + invert = p[1][i][1]["inverted"] + candles = self.get_candles(first, last, symbol, exchange) if invert: tempdata = list( - map(lambda x: (x[0], 1/((x[1]+x[4])/2)), candles)) + map(lambda x: (x[0], 1 / ((x[1] + x[4]) / 2)), candles)) else: tempdata = list( - map(lambda x: (x[0], (x[1]+x[4])/2), candles)) + map(lambda x: (x[0], (x[1] + x[4]) / 2), candles)) if tempdata: for operation in batch: # TODO discuss which candle is picked current is closest to original date (often off by about 1-20s, but can be after the Trade) # times do not always line up perfectly so take one nearest ts = list( - map(lambda x: (abs(misc.to_ms_timestamp(operation)*1000-x[0]), x), tempdata)) - tempdatalis[i].append((operation, min(ts, key=lambda x: x[0])[1][1])) + map(lambda x: (abs(misc.to_ms_timestamp(operation) * 1000 - x[0]), x), tempdata)) + tempdatalis[i].append( + (operation, min(ts, key=lambda x: x[0])[1][1])) else: - tempdatalis=[] - self.path.change_prio(printstr,0.2) # do not try already failed again + tempdatalis = [] + # do not try already failed again + self.path.change_prio(printstr, 0.2) break if tempdatalis: - wantedlen=len(tempdatalis[0]) + wantedlen = len(tempdatalis[0]) for li in tempdatalis: - if not len(li)==wantedlen: - self.path.change_prio(printstr,0.2) + if not len(li) == wantedlen: + self.path.change_prio(printstr, 0.2) break else: - prices=[] + prices = [] for d in tempdatalis: - prices=merge_prices(d,prices) + prices = merge_prices(d, prices) datacomb.extend(prices) break log.debug("path failed trying new path") - + return datacomb - def preload_price_data_path(self,operations: list, coin: str,exchange:str=None): - - + def preload_price_data_path(self, operations: list, coin: str, exchange: str = None): reference_coin = config.FIAT # get pairs used for calculating the price operations_filtered = [] - + tablename = self.get_tablename(coin, reference_coin) - operations_filtered = [op for op in operations if not self.__get_price_db(self.get_db_path(op.platform), tablename, op.utc_time)] - operations_grouped={} + operations_filtered = [op for op in operations if not self.__get_price_db( + self.get_db_path(op.platform), tablename, op.utc_time)] + operations_grouped = {} if operations_filtered: for i in operations_filtered: - if i.coin==config.FIAT: + if i.coin == config.FIAT: pass elif operations_grouped.get(i.platform): operations_grouped[i.platform].append(i) else: - operations_grouped[i.platform]=[i] + operations_grouped[i.platform] = [i] for platf in operations_grouped.keys(): - data=self._get_bulk_pair_data_path(operations_grouped[platf],coin,reference_coin,preferredexchange=platf) + data = self._get_bulk_pair_data_path( + operations_grouped[platf], coin, reference_coin, preferredexchange=platf) for p in data: - self.set_price_db(platf,coin,reference_coin, p[0], p[1]) - - + self.set_price_db(platf, coin, reference_coin, p[0], p[1]) From e1f4581162c6c02263bed0dc2c333e648ed29b83 Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Thu, 8 Apr 2021 12:19:46 +0200 Subject: [PATCH 12/53] formatting --- src/graph.py | 95 ++++++++++++++++++++++++++++++++--------------- src/price_data.py | 81 +++++++++++++++++++++++++++++----------- src/taxman.py | 2 +- 3 files changed, 127 insertions(+), 51 deletions(-) diff --git a/src/graph.py b/src/graph.py index 7e49b596..f9d98271 100644 --- a/src/graph.py +++ b/src/graph.py @@ -1,10 +1,10 @@ -import ccxt from datetime import datetime from time import sleep, time_ns +import ccxt -class PricePath: +class PricePath: def __init__(self, exchanges: list = None, gdict: dict = None, cache: dict = None): if not gdict: gdict = {} @@ -22,29 +22,34 @@ def __init__(self, exchanges: list = None, gdict: dict = None, cache: dict = Non exchange = exchange_class() markets = [] markets = exchange.fetch_markets() - if exchange.has['fetchOHLCV']: + if exchange.has["fetchOHLCV"]: allpairs.extend( - [(i["base"], i["quote"], exchange_id, i["symbol"])for i in markets]) + [(i["base"], i["quote"], exchange_id, i["symbol"]) for i in markets] + ) else: print( - f"{exchange.name} Does not support fetch ohlcv. ignoring exchange and {len(markets)} pairs.") + f"{exchange.name} Does not support fetch ohlcv. ignoring exchange and {len(markets)} pairs." + ) allpairs = list(set(allpairs)) - #print("Total Pairs to check:", len(allpairs)) + # print("Total Pairs to check:", len(allpairs)) allpairs.sort(key=lambda x: x[3]) for i in allpairs: base = i[0] quote = i[1] self.addVertex(base) self.addVertex(quote) - self.addEdge(base, quote, { - "exchange": i[2], "symbol": i[3], "inverted": False}) - self.addEdge(quote, base, { - "exchange": i[2], "symbol": i[3], "inverted": True}) + self.addEdge( + base, quote, {"exchange": i[2], "symbol": i[3], "inverted": False} + ) + self.addEdge( + quote, base, {"exchange": i[2], "symbol": i[3], "inverted": True} + ) def edges(self): return self.findedges() -# Find the distinct list of edges + + # Find the distinct list of edges def findedges(self): edgename = [] @@ -57,7 +62,7 @@ def findedges(self): def getVertices(self): return list(self.gdict.keys()) -# Add the vertex as a key + # Add the vertex as a key def addVertex(self, vrtx): if vrtx not in self.gdict: self.gdict[vrtx] = [] @@ -73,16 +78,21 @@ def _getpath(self, start, stop, maxdepth, depth=0): if (edges := self.gdict.get(start)) and maxdepth > depth: for edge in edges: if depth == 0 and edge[0] == stop: - paths.append([edge, ]) + paths.append( + [ + edge, + ] + ) elif edge[0] == stop: paths.append(edge) else: - path = self._getpath( - edge[0], stop, maxdepth, depth=depth + 1) + path = self._getpath(edge[0], stop, maxdepth, depth=depth + 1) if len(path) and path is not None: for p in path: if p[0] == stop: - newpath = [edge, ] + newpath = [ + edge, + ] newpath.append(p) paths.append(newpath) return paths @@ -94,13 +104,19 @@ def change_prio(self, key, value): else: self.priority[ke] = value - def getpath(self, start, stop, starttime=0, stoptime=0, preferredexchange=None, maxdepth=3): + def getpath( + self, start, stop, starttime=0, stoptime=0, preferredexchange=None, maxdepth=3 + ): def comb_sort_key(path): if preferredexchange: # prioritze pairs with the preferred exchange volume = 1 volumenew = 0 - if not (priority := self.priority.get("-".join([a[1]["symbol"] for a in path]))): + if not ( + priority := self.priority.get( + "-".join([a[1]["symbol"] for a in path]) + ) + ): priority = 0 for c in [a if (a := check_cache(pair)) else None for pair in path]: if c and c[0]: @@ -114,7 +130,17 @@ def comb_sort_key(path): break else: volume = 1 / volumenew - return len(path) + sum([0 if pair[1]["exchange"] == preferredexchange else 1 for pair in path]) + volume + priority + return ( + len(path) + + sum( + [ + 0 if pair[1]["exchange"] == preferredexchange else 1 + for pair in path + ] + ) + + volume + + priority + ) else: return len(path) @@ -131,7 +157,7 @@ def check_cache(pair): def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): rangeinms = 0 - timeframe = int(6.048e+8) # week in ms + timeframe = int(6.048e8) # week in ms if starttimestamp == 0: starttimestamp = 1325372400 * 1000 if stoptimestamp == -1: @@ -156,19 +182,26 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): # maybe a more elaborate ratelimit wich counts execution time to waiting sleep(exchange.rateLimit / 1000) timeframeexchange = exchange.timeframes.get("1w") - if timeframeexchange: # this must be handled better maybe choose timeframe dynamically + if ( + timeframeexchange + ): # this must be handled better maybe choose timeframe dynamically # maybe cache this per pair ohlcv = exchange.fetch_ohlcv( - path[i][1]["symbol"], "1w", starttimestamp, rangeincandles) + path[i][1]["symbol"], "1w", starttimestamp, rangeincandles + ) else: ohlcv = [] # do not check fail later if len(ohlcv) > 1: # (candle ends after the date + timeframe) path[i][1]["stoptime"] = ohlcv[-1][0] + timeframe - path[i][1]["avg_vol"] = sum( - [vol[-1] for vol in ohlcv]) / len(ohlcv) # avg vol in curr + path[i][1]["avg_vol"] = sum([vol[-1] for vol in ohlcv]) / len( + ohlcv + ) # avg vol in curr path[i][1]["starttime"] = ohlcv[0][0] - if path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0: + if ( + path[i][1]["stoptime"] < globalstoptime + or globalstoptime == 0 + ): globalstoptime = path[i][1]["stoptime"] if path[i][1]["starttime"] > globalstarttime: globalstarttime = path[i][1]["starttime"] @@ -177,10 +210,15 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): path[i][1]["starttime"] = 0 path[i][1]["avg_vol"] = 0 self.cache[path[i][1]["exchange"] + path[i][1]["symbol"]] = ( - path[i][1]["starttime"], path[i][1]["stoptime"], path[i][1]["avg_vol"]) + path[i][1]["starttime"], + path[i][1]["stoptime"], + path[i][1]["avg_vol"], + ) else: - if (path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0) and path[i][1]["stoptime"] != 0: + if ( + path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0 + ) and path[i][1]["stoptime"] != 0: globalstoptime = path[i][1]["stoptime"] if path[i][1]["starttime"] > globalstarttime: globalstarttime = path[i][1]["starttime"] @@ -215,8 +253,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): start = "IOTA" to = "EUR" preferredexchange = "binance" - path = g.getpath(start, to, maxdepth=2, - preferredexchange=preferredexchange) + path = g.getpath(start, to, maxdepth=2, preferredexchange=preferredexchange) # debug only in actual use we would iterate over the path object fetching new paths as needed path = list(path) print(len(path)) diff --git a/src/price_data.py b/src/price_data.py index 8c36ab69..3cd3d8eb 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -22,11 +22,11 @@ import sqlite3 import time from pathlib import Path -from typing import Any, Optional, Union from time import sleep +from typing import Any, Optional, Union -import requests import ccxt +import requests import config import misc @@ -424,18 +424,25 @@ def get_cost( def get_candles(self, start: int, stop: int, symbol: str, exchange: str) -> list: exchange_class = getattr(ccxt, exchange) exchange = exchange_class() - if exchange.has['fetchOHLCV']: + if exchange.has["fetchOHLCV"]: sleep(exchange.rateLimit / 1000) # time.sleep wants seconds # get 2min before and after range startval = start - 1000 * 60 * 2 rang = max(int((stop - start) / 1000 / 60) + 2, 1) - return exchange.fetch_ohlcv(symbol, '1m', startval, rang) + return exchange.fetch_ohlcv(symbol, "1m", startval, rang) else: log.error( - "fetchOHLCV not implemented on exchange, skipping priceloading using ohlcv") + "fetchOHLCV not implemented on exchange, skipping priceloading using ohlcv" + ) return None - def _get_bulk_pair_data_path(self, operations: list, coin: str, reference_coin: str, preferredexchange: str = "binance") -> list: + def _get_bulk_pair_data_path( + self, + operations: list, + coin: str, + reference_coin: str, + preferredexchange: str = "binance", + ) -> list: def merge_prices(a: list, b: list = None): prices = [] if not b: @@ -451,14 +458,20 @@ def merge_prices(a: list, b: list = None): timestamps = [] timestamppairs = [] - maxminutes = 300 # coinbasepro only allows a max of 300 minutes need a better solution + maxminutes = ( + 300 # coinbasepro only allows a max of 300 minutes need a better solution + ) timestamps = (op.utc_time for op in operations) if not preferredexchange: preferredexchange = "binance" current_first = None for timestamp in timestamps: - if current_first and current_first + datetime.timedelta(minutes=maxminutes - 4) > timestamp: + if ( + current_first + and current_first + datetime.timedelta(minutes=maxminutes - 4) + > timestamp + ): timestamppairs[-1].append(timestamp) else: current_first = timestamp @@ -468,12 +481,14 @@ def merge_prices(a: list, b: list = None): # ccxt works with timestamps in milliseconds first = misc.to_ms_timestamp(batch[0]) last = misc.to_ms_timestamp(batch[-1]) - firststr = batch[0].strftime('%d-%b-%Y (%H:%M)') - laststr = batch[-1].strftime('%d-%b-%Y (%H:%M)') + firststr = batch[0].strftime("%d-%b-%Y (%H:%M)") + laststr = batch[-1].strftime("%d-%b-%Y (%H:%M)") log.info( - f"getting data from {str(firststr)} to {str(laststr)} for {str(coin)}") - path = self.path.getpath(coin, reference_coin, first, - last, preferredexchange=preferredexchange) + f"getting data from {str(firststr)} to {str(laststr)} for {str(coin)}" + ) + path = self.path.getpath( + coin, reference_coin, first, last, preferredexchange=preferredexchange + ) for p in path: tempdatalis = [] printstr = [a[1]["symbol"] for a in p[1]] @@ -486,19 +501,32 @@ def merge_prices(a: list, b: list = None): candles = self.get_candles(first, last, symbol, exchange) if invert: tempdata = list( - map(lambda x: (x[0], 1 / ((x[1] + x[4]) / 2)), candles)) + map(lambda x: (x[0], 1 / ((x[1] + x[4]) / 2)), candles) + ) else: tempdata = list( - map(lambda x: (x[0], (x[1] + x[4]) / 2), candles)) + map(lambda x: (x[0], (x[1] + x[4]) / 2), candles) + ) if tempdata: for operation in batch: # TODO discuss which candle is picked current is closest to original date (often off by about 1-20s, but can be after the Trade) # times do not always line up perfectly so take one nearest ts = list( - map(lambda x: (abs(misc.to_ms_timestamp(operation) * 1000 - x[0]), x), tempdata)) + map( + lambda x: ( + abs( + misc.to_ms_timestamp(operation) * 1000 + - x[0] + ), + x, + ), + tempdata, + ) + ) tempdatalis[i].append( - (operation, min(ts, key=lambda x: x[0])[1][1])) + (operation, min(ts, key=lambda x: x[0])[1][1]) + ) else: tempdatalis = [] # do not try already failed again @@ -520,15 +548,22 @@ def merge_prices(a: list, b: list = None): return datacomb - def preload_price_data_path(self, operations: list, coin: str, exchange: str = None): + def preload_price_data_path( + self, operations: list, coin: str, exchange: str = None + ): reference_coin = config.FIAT # get pairs used for calculating the price operations_filtered = [] tablename = self.get_tablename(coin, reference_coin) - operations_filtered = [op for op in operations if not self.__get_price_db( - self.get_db_path(op.platform), tablename, op.utc_time)] + operations_filtered = [ + op + for op in operations + if not self.__get_price_db( + self.get_db_path(op.platform), tablename, op.utc_time + ) + ] operations_grouped = {} if operations_filtered: for i in operations_filtered: @@ -540,6 +575,10 @@ def preload_price_data_path(self, operations: list, coin: str, exchange: str = N operations_grouped[i.platform] = [i] for platf in operations_grouped.keys(): data = self._get_bulk_pair_data_path( - operations_grouped[platf], coin, reference_coin, preferredexchange=platf) + operations_grouped[platf], + coin, + reference_coin, + preferredexchange=platf, + ) for p in data: self.set_price_db(platf, coin, reference_coin, p[0], p[1]) diff --git a/src/taxman.py b/src/taxman.py index 1e3e7e75..6ec05165 100644 --- a/src/taxman.py +++ b/src/taxman.py @@ -180,7 +180,7 @@ def evaluate_taxation(self) -> None: log.debug("Starting evaluation...") for coin, operations in misc.group_by(self.book.operations, "coin").items(): operations = sorted(operations, key=lambda op: op.utc_time) - self.price_data.preload_price_data_path(operations,coin) + self.price_data.preload_price_data_path(operations, coin) self.__evaluate_taxation(coin, operations) def print_evaluation(self) -> None: From 95deecc41d4dd9807153da2be4ffe8655e889f95 Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Thu, 8 Apr 2021 12:25:18 +0200 Subject: [PATCH 13/53] fixed some mypy bugs --- src/graph.py | 9 ++------- src/price_data.py | 8 ++++---- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/src/graph.py b/src/graph.py index f9d98271..8cc222dc 100644 --- a/src/graph.py +++ b/src/graph.py @@ -5,16 +5,12 @@ class PricePath: - def __init__(self, exchanges: list = None, gdict: dict = None, cache: dict = None): - if not gdict: - gdict = {} - if not cache: - cache = {} + def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}): if not exchanges: exchanges = ["binance", "coinbasepro"] self.gdict = gdict self.cache = cache - self.priority = {} + self.priority : dict= {} allpairs = [] for exchange_id in exchanges: @@ -249,7 +245,6 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): if __name__ == "__main__": g = PricePath() - allpairs = [] start = "IOTA" to = "EUR" preferredexchange = "binance" diff --git a/src/price_data.py b/src/price_data.py index 3cd3d8eb..c1b4711b 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -423,13 +423,13 @@ def get_cost( def get_candles(self, start: int, stop: int, symbol: str, exchange: str) -> list: exchange_class = getattr(ccxt, exchange) - exchange = exchange_class() - if exchange.has["fetchOHLCV"]: - sleep(exchange.rateLimit / 1000) # time.sleep wants seconds + exchange_obj = exchange_class() + if exchange_obj.has["fetchOHLCV"]: + sleep(exchange_obj.rateLimit / 1000) # time.sleep wants seconds # get 2min before and after range startval = start - 1000 * 60 * 2 rang = max(int((stop - start) / 1000 / 60) + 2, 1) - return exchange.fetch_ohlcv(symbol, "1m", startval, rang) + return list(exchange_obj.fetch_ohlcv(symbol, "1m", startval, rang)) else: log.error( "fetchOHLCV not implemented on exchange, skipping priceloading using ohlcv" From 7976ee651036de18c7a14ef605d7bfedbd5a45af Mon Sep 17 00:00:00 2001 From: Jeppy Date: Thu, 8 Apr 2021 13:31:58 +0200 Subject: [PATCH 14/53] black formatting --- src/graph.py | 2 +- src/taxman.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/graph.py b/src/graph.py index 8cc222dc..23f93db2 100644 --- a/src/graph.py +++ b/src/graph.py @@ -10,7 +10,7 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}): exchanges = ["binance", "coinbasepro"] self.gdict = gdict self.cache = cache - self.priority : dict= {} + self.priority: dict = {} allpairs = [] for exchange_id in exchanges: diff --git a/src/taxman.py b/src/taxman.py index 6ec05165..1df6a84e 100644 --- a/src/taxman.py +++ b/src/taxman.py @@ -45,8 +45,6 @@ def __init__(self, book: Book, price_data: PriceData) -> None: self.__evaluate_taxation = getattr(self, f"_evaluate_taxation_{country}") except AttributeError: raise NotImplementedError(f"Unable to evaluate taxation for {country=}.") - - if config.PRINCIPLE == core.Principle.FIFO: self.BalanceType = balance_queue.BalanceQueue From a0c8df5884789d258810165c91237f6f05559f6d Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Thu, 8 Apr 2021 13:58:47 +0200 Subject: [PATCH 15/53] fix mypy --- src/graph.py | 6 +++--- src/price_data.py | 24 ++++++++++++------------ 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/graph.py b/src/graph.py index 23f93db2..70a75946 100644 --- a/src/graph.py +++ b/src/graph.py @@ -1,7 +1,7 @@ from datetime import datetime from time import sleep, time_ns -import ccxt +import ccxt #type: ignore class PricePath: @@ -10,7 +10,7 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}): exchanges = ["binance", "coinbasepro"] self.gdict = gdict self.cache = cache - self.priority: dict = {} + self.priority : dict= {} allpairs = [] for exchange_id in exchanges: @@ -29,7 +29,7 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}): ) allpairs = list(set(allpairs)) # print("Total Pairs to check:", len(allpairs)) - allpairs.sort(key=lambda x: x[3]) + allpairs.sort(key=lambda x: x[3]) #type: ignore for i in allpairs: base = i[0] quote = i[1] diff --git a/src/price_data.py b/src/price_data.py index c1b4711b..0b8dffd5 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -25,7 +25,7 @@ from time import sleep from typing import Any, Optional, Union -import ccxt +import ccxt #type: ignore import requests import config @@ -434,7 +434,7 @@ def get_candles(self, start: int, stop: int, symbol: str, exchange: str) -> list log.error( "fetchOHLCV not implemented on exchange, skipping priceloading using ohlcv" ) - return None + return [] def _get_bulk_pair_data_path( self, @@ -443,7 +443,7 @@ def _get_bulk_pair_data_path( reference_coin: str, preferredexchange: str = "binance", ) -> list: - def merge_prices(a: list, b: list = None): + def merge_prices(a: list, b: list = []) -> list: prices = [] if not b: return a @@ -456,12 +456,12 @@ def merge_prices(a: list, b: list = None): prices.append((i[0], i[1] * factor)) return prices - timestamps = [] - timestamppairs = [] + timestamps: list = [] + timestamppairs: list = [] maxminutes = ( 300 # coinbasepro only allows a max of 300 minutes need a better solution ) - timestamps = (op.utc_time for op in operations) + timestamps = (op.utc_time for op in operations) # type: ignore if not preferredexchange: preferredexchange = "binance" @@ -490,7 +490,7 @@ def merge_prices(a: list, b: list = None): coin, reference_coin, first, last, preferredexchange=preferredexchange ) for p in path: - tempdatalis = [] + tempdatalis: list = [] printstr = [a[1]["symbol"] for a in p[1]] log.debug(f"found path over {' -> '.join(printstr)}") for i in range(len(p[1])): @@ -525,7 +525,7 @@ def merge_prices(a: list, b: list = None): ) ) tempdatalis[i].append( - (operation, min(ts, key=lambda x: x[0])[1][1]) + (operation, min(ts, key=lambda x: x[0])[1][1]) # type: ignore ) else: tempdatalis = [] @@ -539,7 +539,7 @@ def merge_prices(a: list, b: list = None): self.path.change_prio(printstr, 0.2) break else: - prices = [] + prices: list = [] for d in tempdatalis: prices = merge_prices(d, prices) datacomb.extend(prices) @@ -549,8 +549,8 @@ def merge_prices(a: list, b: list = None): return datacomb def preload_price_data_path( - self, operations: list, coin: str, exchange: str = None - ): + self, operations: list, coin: str, exchange: str = "" + ) -> None: reference_coin = config.FIAT # get pairs used for calculating the price @@ -564,7 +564,7 @@ def preload_price_data_path( self.get_db_path(op.platform), tablename, op.utc_time ) ] - operations_grouped = {} + operations_grouped:dict = {} if operations_filtered: for i in operations_filtered: if i.coin == config.FIAT: From 08f34020538a8b447d2334976aa604e8f4abe81e Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Thu, 8 Apr 2021 17:22:46 +0200 Subject: [PATCH 16/53] flake compatible formatting (except E501) --- src/graph.py | 22 +++++++++++++--------- src/price_data.py | 17 ++++++++++------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/graph.py b/src/graph.py index 70a75946..ba45674c 100644 --- a/src/graph.py +++ b/src/graph.py @@ -1,7 +1,6 @@ -from datetime import datetime from time import sleep, time_ns -import ccxt #type: ignore +import ccxt # type: ignore class PricePath: @@ -10,7 +9,7 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}): exchanges = ["binance", "coinbasepro"] self.gdict = gdict self.cache = cache - self.priority : dict= {} + self.priority: dict = {} allpairs = [] for exchange_id in exchanges: @@ -29,7 +28,7 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}): ) allpairs = list(set(allpairs)) # print("Total Pairs to check:", len(allpairs)) - allpairs.sort(key=lambda x: x[3]) #type: ignore + allpairs.sort(key=lambda x: x[3]) for i in allpairs: base = i[0] quote = i[1] @@ -119,7 +118,8 @@ def comb_sort_key(path): if c[1][1]["stoptime"] == 0: break elif c[1][1]["avg_vol"] != 0: - # is very much off because volume is not in the same currency something for later + # is very much off because volume is not in the same + # currency something for later volumenew += c[1][1]["avg_vol"] else: @@ -164,7 +164,8 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): else: rangeinms = 0 # maybe throw error - # add one candle to the end to ensure the needed timeslot is in the requested candles + # add one candle to the end to ensure the needed + # timeslot is in the requested candles rangeincandles = int(rangeinms / timeframe) + 1 # todo: cache already used pairs @@ -175,7 +176,8 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): if not cached: exchange_class = getattr(ccxt, path[i][1]["exchange"]) exchange = exchange_class() - # maybe a more elaborate ratelimit wich counts execution time to waiting + # TODO maybe a more elaborate ratelimit wich removes execution + # time to from the ratelimit sleep(exchange.rateLimit / 1000) timeframeexchange = exchange.timeframes.get("1w") if ( @@ -228,7 +230,8 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): # get timeframe in which a path is viable for path in paths: timest, newpath = get_active_timeframe(path) - # this is implemented as a generator (hence the yield) to reduce the amount of computing needed. if the first + # this is implemented as a generator (hence the yield) to reduce + # the amount of computing needed. if the first path fails the next is used if starttime == 0 and stoptime == 0: yield timest, newpath elif starttime == 0: @@ -249,6 +252,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): to = "EUR" preferredexchange = "binance" path = g.getpath(start, to, maxdepth=2, preferredexchange=preferredexchange) - # debug only in actual use we would iterate over the path object fetching new paths as needed + # debug only in actual use we would iterate over + # the path object fetching new paths as needed path = list(path) print(len(path)) diff --git a/src/price_data.py b/src/price_data.py index 0b8dffd5..e65cc3dc 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -25,7 +25,7 @@ from time import sleep from typing import Any, Optional, Union -import ccxt #type: ignore +import ccxt # type: ignore import requests import config @@ -432,8 +432,9 @@ def get_candles(self, start: int, stop: int, symbol: str, exchange: str) -> list return list(exchange_obj.fetch_ohlcv(symbol, "1m", startval, rang)) else: log.error( - "fetchOHLCV not implemented on exchange, skipping priceloading using ohlcv" + "fetchOHLCV not implemented on exchange, skipping ohlcv" ) + # shouldnt happen technically because exchanges are filterd for fetchohlcv return [] def _get_bulk_pair_data_path( @@ -443,7 +444,7 @@ def _get_bulk_pair_data_path( reference_coin: str, preferredexchange: str = "binance", ) -> list: - def merge_prices(a: list, b: list = []) -> list: + def merge_prices(a: list, b: list = []) -> list: prices = [] if not b: return a @@ -461,7 +462,7 @@ def merge_prices(a: list, b: list = []) -> list: maxminutes = ( 300 # coinbasepro only allows a max of 300 minutes need a better solution ) - timestamps = (op.utc_time for op in operations) # type: ignore + timestamps = [op.utc_time for op in operations] if not preferredexchange: preferredexchange = "binance" @@ -510,7 +511,9 @@ def merge_prices(a: list, b: list = []) -> list: if tempdata: for operation in batch: - # TODO discuss which candle is picked current is closest to original date (often off by about 1-20s, but can be after the Trade) + # TODO discuss which candle is picked + # current is closest to original date + # (often off by about 1-20s, but can be after the Trade) # times do not always line up perfectly so take one nearest ts = list( map( @@ -525,7 +528,7 @@ def merge_prices(a: list, b: list = []) -> list: ) ) tempdatalis[i].append( - (operation, min(ts, key=lambda x: x[0])[1][1]) # type: ignore + (operation, min(ts, key=lambda x: x[0])[1][1]) ) else: tempdatalis = [] @@ -564,7 +567,7 @@ def preload_price_data_path( self.get_db_path(op.platform), tablename, op.utc_time ) ] - operations_grouped:dict = {} + operations_grouped: dict = {} if operations_filtered: for i in operations_filtered: if i.coin == config.FIAT: From 23f38b61a69082f3afe03909246d7cc7cd283db5 Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Thu, 8 Apr 2021 17:38:27 +0200 Subject: [PATCH 17/53] documentation --- src/graph.py | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/src/graph.py b/src/graph.py index ba45674c..4430cbe6 100644 --- a/src/graph.py +++ b/src/graph.py @@ -9,7 +9,8 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}): exchanges = ["binance", "coinbasepro"] self.gdict = gdict self.cache = cache - self.priority: dict = {} + self.priority: dict[str,int] = {} + #saves the priority for a certain path so that bad paths can be skipped allpairs = [] for exchange_id in exchanges: @@ -26,9 +27,10 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}): print( f"{exchange.name} Does not support fetch ohlcv. ignoring exchange and {len(markets)} pairs." ) - allpairs = list(set(allpairs)) + allpairs = list(set(allpairs)) # fast an easy deduplication # print("Total Pairs to check:", len(allpairs)) - allpairs.sort(key=lambda x: x[3]) + allpairs.sort(key=lambda x: x[3]) + #sorting by symbol for pair to have the same result on every run due to the set for i in allpairs: base = i[0] quote = i[1] @@ -69,6 +71,9 @@ def addEdge(self, vrtx1, vrtx2, data): self.gdict[vrtx1] = [vrtx2] def _getpath(self, start, stop, maxdepth, depth=0): + """ + a recursive function for finding all possible paths between to edges + """ paths = [] if (edges := self.gdict.get(start)) and maxdepth > depth: for edge in edges: @@ -103,6 +108,14 @@ def getpath( self, start, stop, starttime=0, stoptime=0, preferredexchange=None, maxdepth=3 ): def comb_sort_key(path): + """ + Sorting funtction which is used to prioritze paths by (in order of magnitude): + - smallest length -> +1 per element + - preferred exchange -> +1 per exchange which is not preferred + - priority -> +0.5 per unfinished execution of path + - volume (if known) -> 1/sum(avg_vol per pair) + - volume (if not known) -> 1 -> always smaller if volume is known + """ if preferredexchange: # prioritze pairs with the preferred exchange volume = 1 @@ -141,7 +154,10 @@ def comb_sort_key(path): return len(path) def check_cache(pair): - + """ + checking if the start and stoptime of a pair is already known + or if it needs to be downloaded + """ if pair[1].get("starttime") or pair[1].get("stoptime"): return True, pair if cacheres := self.cache.get(pair[1]["exchange"] + pair[1]["symbol"]): @@ -240,7 +256,8 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): elif stoptime == 0: if starttime > timest[0]: yield timest, newpath - + # The most ideal situation is if the timerange of the path is known + # and larger than the needed timerange else: if stoptime < timest[1] and starttime > timest[0]: yield timest, newpath From 3939e1599e7d057a777c3082726cf6d6d77e0eb7 Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Thu, 8 Apr 2021 18:07:13 +0200 Subject: [PATCH 18/53] small formatting --- src/graph.py | 16 ++++++++-------- src/price_data.py | 4 +--- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/graph.py b/src/graph.py index 4430cbe6..c1f3aea3 100644 --- a/src/graph.py +++ b/src/graph.py @@ -9,8 +9,8 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}): exchanges = ["binance", "coinbasepro"] self.gdict = gdict self.cache = cache - self.priority: dict[str,int] = {} - #saves the priority for a certain path so that bad paths can be skipped + self.priority: dict[str, int] = {} + # saves the priority for a certain path so that bad paths can be skipped allpairs = [] for exchange_id in exchanges: @@ -27,10 +27,10 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}): print( f"{exchange.name} Does not support fetch ohlcv. ignoring exchange and {len(markets)} pairs." ) - allpairs = list(set(allpairs)) # fast an easy deduplication + allpairs = list(set(allpairs)) # fast an easy deduplication # print("Total Pairs to check:", len(allpairs)) - allpairs.sort(key=lambda x: x[3]) - #sorting by symbol for pair to have the same result on every run due to the set + allpairs.sort(key=lambda x: x[3]) + # sorting by symbol for pair to have the same result on every run due to the set for i in allpairs: base = i[0] quote = i[1] @@ -113,8 +113,8 @@ def comb_sort_key(path): - smallest length -> +1 per element - preferred exchange -> +1 per exchange which is not preferred - priority -> +0.5 per unfinished execution of path - - volume (if known) -> 1/sum(avg_vol per pair) - - volume (if not known) -> 1 -> always smaller if volume is known + - volume (if known) -> 1/sum(avg_vol per pair) + - volume (if not known) -> 1 -> always smaller if volume is known """ if preferredexchange: # prioritze pairs with the preferred exchange @@ -256,7 +256,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): elif stoptime == 0: if starttime > timest[0]: yield timest, newpath - # The most ideal situation is if the timerange of the path is known + # The most ideal situation is if the timerange of the path is known # and larger than the needed timerange else: if stoptime < timest[1] and starttime > timest[0]: diff --git a/src/price_data.py b/src/price_data.py index e65cc3dc..dbdf0b39 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -431,9 +431,7 @@ def get_candles(self, start: int, stop: int, symbol: str, exchange: str) -> list rang = max(int((stop - start) / 1000 / 60) + 2, 1) return list(exchange_obj.fetch_ohlcv(symbol, "1m", startval, rang)) else: - log.error( - "fetchOHLCV not implemented on exchange, skipping ohlcv" - ) + log.error("fetchOHLCV not implemented on exchange, skipping ohlcv") # shouldnt happen technically because exchanges are filterd for fetchohlcv return [] From 65cbfbfb6c7d5c798ffd7f96444dce9aba19945f Mon Sep 17 00:00:00 2001 From: Jeppy Date: Thu, 8 Apr 2021 19:22:59 +0200 Subject: [PATCH 19/53] RM `# type: ignore` --- src/graph.py | 2 +- src/price_data.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/graph.py b/src/graph.py index c1f3aea3..6215e435 100644 --- a/src/graph.py +++ b/src/graph.py @@ -1,6 +1,6 @@ from time import sleep, time_ns -import ccxt # type: ignore +import ccxt class PricePath: diff --git a/src/price_data.py b/src/price_data.py index dbdf0b39..ab4ed610 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -25,7 +25,7 @@ from time import sleep from typing import Any, Optional, Union -import ccxt # type: ignore +import ccxt import requests import config From 888dba81cab5d7f2713769dd9205fd7fd6279d68 Mon Sep 17 00:00:00 2001 From: Jeppy Date: Thu, 8 Apr 2021 19:40:49 +0200 Subject: [PATCH 20/53] Use logging instead of print --- src/graph.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/graph.py b/src/graph.py index 6215e435..65843fdc 100644 --- a/src/graph.py +++ b/src/graph.py @@ -1,7 +1,10 @@ +import logging from time import sleep, time_ns import ccxt +log = logging.getLogger(__name__) + class PricePath: def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}): @@ -24,7 +27,7 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}): [(i["base"], i["quote"], exchange_id, i["symbol"]) for i in markets] ) else: - print( + logging.warning( f"{exchange.name} Does not support fetch ohlcv. ignoring exchange and {len(markets)} pairs." ) allpairs = list(set(allpairs)) # fast an easy deduplication From 5d4f394580607da998a9c66248e5c9d5df12e576 Mon Sep 17 00:00:00 2001 From: Jeppy Date: Thu, 8 Apr 2021 19:49:03 +0200 Subject: [PATCH 21/53] Order requirements-dev alphabetically --- requirements-dev.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements-dev.txt b/requirements-dev.txt index 8a333263..4cae901f 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,8 +4,8 @@ click==7.1.2 flake8==3.8.4 isort==5.7.0 mccabe==0.6.1 -mypy-extensions==0.4.3 mypy==0.812 +mypy-extensions==0.4.3 pathspec==0.8.1 pycodestyle==2.6.0 pyflakes==2.2.0 From be5ab791cd1d5bbef717a0efa90833e8c5228d1f Mon Sep 17 00:00:00 2001 From: Jeppy Date: Thu, 8 Apr 2021 19:49:15 +0200 Subject: [PATCH 22/53] ADD all required modules explicitly --- requirements.txt | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/requirements.txt b/requirements.txt index 542c7cb2..af2835fa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,18 @@ +aiodns==2.0.0 +aiohttp==3.7.4.post0 +async-timeout==3.0.1 +attrs==20.3.0 ccxt==1.42.47 certifi==2020.12.5 +cffi==1.14.5 chardet==4.0.0 +cryptography==3.4.7 idna==2.10 +multidict==5.1.0 +pycares==3.1.1 +pycparser==2.20 python-dateutil==2.8.1 requests==2.25.1 six==1.15.0 urllib3==1.26.4 +yarl==1.1.0 From 8928d6d694d66ff727b4d2dfe957b07f6759a1b8 Mon Sep 17 00:00:00 2001 From: Jeppy Date: Thu, 8 Apr 2021 20:08:01 +0200 Subject: [PATCH 23/53] Use explicit import --- src/graph.py | 6 +++--- src/price_data.py | 7 +++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/graph.py b/src/graph.py index 65843fdc..93874394 100644 --- a/src/graph.py +++ b/src/graph.py @@ -1,5 +1,5 @@ import logging -from time import sleep, time_ns +import time import ccxt @@ -176,7 +176,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): if starttimestamp == 0: starttimestamp = 1325372400 * 1000 if stoptimestamp == -1: - stoptimestamp = time_ns() // 1_000_000 # get cur time in ms + stoptimestamp = time.time_ns() // 1_000_000 # get cur time in ms starttimestamp -= timeframe # to handle edge cases if stoptimestamp > starttimestamp: rangeinms = stoptimestamp - starttimestamp @@ -197,7 +197,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): exchange = exchange_class() # TODO maybe a more elaborate ratelimit wich removes execution # time to from the ratelimit - sleep(exchange.rateLimit / 1000) + time.sleep(exchange.rateLimit / 1000) timeframeexchange = exchange.timeframes.get("1w") if ( timeframeexchange diff --git a/src/price_data.py b/src/price_data.py index ab4ed610..9f4a430b 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -22,17 +22,16 @@ import sqlite3 import time from pathlib import Path -from time import sleep from typing import Any, Optional, Union import ccxt import requests import config +import graph import misc import transaction from core import kraken_pair_map -from graph import PricePath log = logging.getLogger(__name__) @@ -45,7 +44,7 @@ class PriceData: def __init__(self): - self.path = PricePath() + self.path = graph.PricePath() def get_db_path(self, platform: str) -> Path: return Path(config.DATA_PATH, f"{platform}.db") @@ -425,7 +424,7 @@ def get_candles(self, start: int, stop: int, symbol: str, exchange: str) -> list exchange_class = getattr(ccxt, exchange) exchange_obj = exchange_class() if exchange_obj.has["fetchOHLCV"]: - sleep(exchange_obj.rateLimit / 1000) # time.sleep wants seconds + time.sleep(exchange_obj.rateLimit / 1000) # time.sleep wants seconds # get 2min before and after range startval = start - 1000 * 60 * 2 rang = max(int((stop - start) / 1000 / 60) + 2, 1) From b2f207478b5440768edf9da555f21ff09b9d304d Mon Sep 17 00:00:00 2001 From: Jeppy Date: Thu, 8 Apr 2021 20:09:15 +0200 Subject: [PATCH 24/53] FIX remove false *1000 --- src/price_data.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/price_data.py b/src/price_data.py index 9f4a430b..26012dff 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -515,10 +515,7 @@ def merge_prices(a: list, b: list = []) -> list: ts = list( map( lambda x: ( - abs( - misc.to_ms_timestamp(operation) * 1000 - - x[0] - ), + abs(misc.to_ms_timestamp(operation) - x[0]), x, ), tempdata, From 700930afeac547ad9a2cbe57c4ff82278c7a4c07 Mon Sep 17 00:00:00 2001 From: Jeppy Date: Fri, 9 Apr 2021 20:06:09 +0200 Subject: [PATCH 25/53] REFACTOR PriceData.get_candles --- src/price_data.py | 51 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/src/price_data.py b/src/price_data.py index 26012dff..fd78ec0c 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -19,6 +19,7 @@ import decimal import json import logging +import math import sqlite3 import time from pathlib import Path @@ -420,19 +421,43 @@ def get_cost( return price * tr.sold raise NotImplementedError - def get_candles(self, start: int, stop: int, symbol: str, exchange: str) -> list: - exchange_class = getattr(ccxt, exchange) - exchange_obj = exchange_class() - if exchange_obj.has["fetchOHLCV"]: - time.sleep(exchange_obj.rateLimit / 1000) # time.sleep wants seconds - # get 2min before and after range - startval = start - 1000 * 60 * 2 - rang = max(int((stop - start) / 1000 / 60) + 2, 1) - return list(exchange_obj.fetch_ohlcv(symbol, "1m", startval, rang)) - else: - log.error("fetchOHLCV not implemented on exchange, skipping ohlcv") - # shouldnt happen technically because exchanges are filterd for fetchohlcv - return [] + def get_candles(self, start: int, stop: int, symbol: str, exchange_id: str) -> list: + """Return list with candles starting 2 minutes before start. + + Args: + start (int): Start time in milliseconds since epoch. + stop (int): End time in milliseconds. + symbol (str) + exchange_id (str) + + Returns: + list: List of OHLCV candles gathered from ccxt. + """ + assert stop >= start, f"`stop` must be after `start` {stop} !>= {start}." + + exchange_class = getattr(ccxt, exchange_id) + exchange = exchange_class() + assert isinstance(exchange, ccxt.Exchange) + + # Technically impossible. Unsupported exchanges should be detected earlier. + assert exchange.has["fetchOHLCV"] + + # time.sleep wants seconds + time.sleep(exchange.rateLimit / 1000) + + # Get candles 2 min before and after start/stop. + since = start - 2 * 60 * 1000 + # `fetch_ohlcv` has no stop value but only a limit (amount of candles fetched). + # Calculate the amount of candles in the 1 min timeframe, + # so that we get enough candles. + # BUG Most exchange have an upper limit (e.g. binance 1000, coinbasepro 300). + # We should throw a warning and make sure that `limit` is below their + # supported maximum. + limit = math.ceil((stop - start) / (1000 * 60)) + 2 + + candles = exchange.fetch_ohlcv(symbol, "1m", since, limit) + assert isinstance(candles, list) + return candles def _get_bulk_pair_data_path( self, From 3c036bb93d114a3c86b378530bdbe4f015a257a0 Mon Sep 17 00:00:00 2001 From: Jeppy Date: Fri, 9 Apr 2021 21:05:18 +0200 Subject: [PATCH 26/53] ADD flake8-bugbear to show additional warnings --- requirements-dev.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-dev.txt b/requirements-dev.txt index 4cae901f..15fe5aa7 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,6 +2,7 @@ appdirs==1.4.4 black==20.8b1 click==7.1.2 flake8==3.8.4 +flake8-bugbear==21.4.3 isort==5.7.0 mccabe==0.6.1 mypy==0.812 From 644c2a989229c90143bf7aa14466a8f15f687591 Mon Sep 17 00:00:00 2001 From: Jeppy Date: Sat, 10 Apr 2021 10:50:03 +0200 Subject: [PATCH 27/53] Adjust bug message, ccxt raises error when ohlc limit is exceeded --- src/price_data.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/price_data.py b/src/price_data.py index fd78ec0c..60873df6 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -450,9 +450,8 @@ def get_candles(self, start: int, stop: int, symbol: str, exchange_id: str) -> l # `fetch_ohlcv` has no stop value but only a limit (amount of candles fetched). # Calculate the amount of candles in the 1 min timeframe, # so that we get enough candles. - # BUG Most exchange have an upper limit (e.g. binance 1000, coinbasepro 300). - # We should throw a warning and make sure that `limit` is below their - # supported maximum. + # Most exchange have an upper limit (e.g. binance 1000, coinbasepro 300). + # `ccxt` throws an error if we exceed this limit. limit = math.ceil((stop - start) / (1000 * 60)) + 2 candles = exchange.fetch_ohlcv(symbol, "1m", since, limit) From d28babdaba548a7d22f6cb335e58e6beeead185e Mon Sep 17 00:00:00 2001 From: Jeppy Date: Sat, 10 Apr 2021 12:48:03 +0200 Subject: [PATCH 28/53] REFACTOR PriceData.`preload_price_data_path` - Rename to `preload_prices` - ADD `get_missing_price_operations` using one query to find all missing prices - ADD TODO to use bulk insert --- src/price_data.py | 133 +++++++++++++++++++++++++++++++++++----------- src/taxman.py | 6 ++- 2 files changed, 107 insertions(+), 32 deletions(-) diff --git a/src/price_data.py b/src/price_data.py index 60873df6..035c6baa 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -408,6 +408,70 @@ def get_price( self.__set_price_db(db_path, tablename, utc_time, price) return price + def get_missing_price_operations( + self, + operations: list[transaction.Operation], + coin: str, + platform: str, + reference_coin: str = config.FIAT, + ) -> list[transaction.Operation]: + """Return operations for which no price was found in the database. + + Requires the `operations` to have the same `coin` and `platform`. + + Args: + operations (list[transaction.Operation]) + coin (str) + platform (str) + reference_coin (str): Defaults to `config.FIAT`. + + Returns: + list[transaction.Operation] + """ + assert all(op.coin == coin for op in operations) + assert all(op.platform == platform for op in operations) + + # We do not have to calculate the price, if there are no operations or the + # coin is the same as the reference coin. + if not operations or coin == reference_coin: + return [] + + db_path = self.get_db_path(platform) + # If the price database does not exist, we need to query all prices. + if not db_path.is_file(): + return operations + + tablename = self.get_tablename(coin, reference_coin) + utc_time_values = ",".join(f"('{op.utc_time}')" for op in operations) + + with sqlite3.connect(db_path) as conn: + cur = conn.cursor() + # The query returns a list with 0 and 1's. + # - 0: a price exists. + # - 1: the price is missing. + query = ( + "SELECT t.utc_time IS NULL " + f"FROM (VALUES {utc_time_values}) " + f"LEFT JOIN `{tablename}` t ON t.utc_time = COLUMN1;" + ) + + # Execute the query. + try: + cur.execute(query) + except sqlite3.OperationalError as e: + if str(e) == f"no such table: {tablename}": + # The corresponding price table does not exist yet. + # We need to query all prices. + return operations + raise e + + # Evaluate the result. + result = (bool(is_missing) for is_missing, in cur.fetchall()) + missing_prices_operations = [ + op for op, is_missing in zip(operations, result) if is_missing + ] + return missing_prices_operations + def get_cost( self, tr: Union[transaction.Operation, transaction.SoldCoin], @@ -569,37 +633,44 @@ def merge_prices(a: list, b: list = []) -> list: return datacomb - def preload_price_data_path( - self, operations: list, coin: str, exchange: str = "" + def preload_prices( + self, + operations: list[transaction.Operation], + coin: str, + platform: str, + reference_coin: str = config.FIAT, ) -> None: + """Preload price data. - reference_coin = config.FIAT - # get pairs used for calculating the price - operations_filtered = [] + Requires the operations to have the same `coin` and `exchange`. - tablename = self.get_tablename(coin, reference_coin) - operations_filtered = [ - op - for op in operations - if not self.__get_price_db( - self.get_db_path(op.platform), tablename, op.utc_time - ) - ] - operations_grouped: dict = {} - if operations_filtered: - for i in operations_filtered: - if i.coin == config.FIAT: - pass - elif operations_grouped.get(i.platform): - operations_grouped[i.platform].append(i) - else: - operations_grouped[i.platform] = [i] - for platf in operations_grouped.keys(): - data = self._get_bulk_pair_data_path( - operations_grouped[platf], - coin, - reference_coin, - preferredexchange=platf, - ) - for p in data: - self.set_price_db(platf, coin, reference_coin, p[0], p[1]) + Args: + operations (list[transaction.Operation]) + coin (str) + platform (str) + reference_coin (str): Defaults to `config.FIAT`. + """ + assert all(op.coin == coin for op in operations) + assert all(op.platform == platform for op in operations) + + # We do not have to preload prices, if there are no operations or the coin is + # the same as the reference coin. + if not operations or coin == reference_coin: + return + + # Only consider the operations for which we have no prices in the database. + missing_prices_operations = self.get_missing_price_operations( + operations, coin, platform, reference_coin + ) + + # Preload the prices. + data = self._get_bulk_pair_data_path( + missing_prices_operations, + coin, + reference_coin, + preferredexchange=platform, + ) + + # TODO Use bulk insert to write all prices at once into the database. + for p in data: + self.set_price_db(platform, coin, reference_coin, p[0], p[1]) diff --git a/src/taxman.py b/src/taxman.py index 1df6a84e..8531ddce 100644 --- a/src/taxman.py +++ b/src/taxman.py @@ -178,7 +178,11 @@ def evaluate_taxation(self) -> None: log.debug("Starting evaluation...") for coin, operations in misc.group_by(self.book.operations, "coin").items(): operations = sorted(operations, key=lambda op: op.utc_time) - self.price_data.preload_price_data_path(operations, coin) + + # Preload prices per exchange. + for platform, _operations in misc.group_by(operations, "platform").items(): + self.price_data.preload_prices(_operations, coin, platform) + self.__evaluate_taxation(coin, operations) def print_evaluation(self) -> None: From 2d31fc14969c22ee5fbb549b5cd5713f08b8e923 Mon Sep 17 00:00:00 2001 From: Jeppy Date: Sat, 10 Apr 2021 13:06:51 +0200 Subject: [PATCH 29/53] FIX `force_decimal` should raise ValueError instead of KeyError --- src/misc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/misc.py b/src/misc.py index a8e6dbc3..c938b43a 100644 --- a/src/misc.py +++ b/src/misc.py @@ -75,7 +75,7 @@ def force_decimal(x: Union[str, int, float]) -> decimal.Decimal: x (Union[None, str, int, float]) Raises: - KeyError: The given argument can not be parsed accordingly. + ValueError: The given argument can not be parsed accordingly. Returns: decimal.Decimal @@ -84,7 +84,7 @@ def force_decimal(x: Union[str, int, float]) -> decimal.Decimal: if isinstance(d, decimal.Decimal): return d else: - raise KeyError(f"Could not parse `{d}` to decimal") + raise ValueError(f"Could not parse `{d}` to decimal") def reciprocal(d: decimal.Decimal) -> decimal.Decimal: From 6acd035b0c2a9e8f96c18e19b6f3e8564897b0d3 Mon Sep 17 00:00:00 2001 From: Jeppy Date: Sat, 10 Apr 2021 13:20:17 +0200 Subject: [PATCH 30/53] ADD `get_avg_candle_prices` --- src/price_data.py | 51 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 41 insertions(+), 10 deletions(-) diff --git a/src/price_data.py b/src/price_data.py index 035c6baa..e11be918 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -522,6 +522,44 @@ def get_candles(self, start: int, stop: int, symbol: str, exchange_id: str) -> l assert isinstance(candles, list) return candles + def get_avg_candle_prices( + self, start: int, stop: int, symbol: str, exchange_id: str, invert: bool = False + ) -> list[tuple[int, decimal.Decimal]]: + """Return average price from ohlcv candles. + + The average price of the candle is calculated as the avergae from the + open and close price. + + Further information about candle-function can be found in `get_candles`. + + Args: + start (int) + stop (int) + symbol (str) + exchange_id (str) + invert (bool, optional): Defaults to False. + + Returns: + list: Timestamp and average prices of candles containing: + + timestamp (int): Timestamp of candle in milliseconds since epoch. + avg_price (decimal.Decimal): Average price of candle. + """ + avg_candle_prices = [] + candle_prices = self.get_candles(start, stop, symbol, exchange_id) + + for timestamp_ms, _open, _high, _low, _close, _volume in candle_prices: + open = misc.force_decimal(_open) + close = misc.force_decimal(_close) + + avg_price = (open + close) / 2 + + if invert and avg_price != 0: + avg_price = 1 / avg_price + + avg_candle_prices.append((timestamp_ms, avg_price)) + return avg_candle_prices + def _get_bulk_pair_data_path( self, operations: list, @@ -584,17 +622,10 @@ def merge_prices(a: list, b: list = []) -> list: symbol = p[1][i][1]["symbol"] exchange = p[1][i][1]["exchange"] invert = p[1][i][1]["inverted"] - candles = self.get_candles(first, last, symbol, exchange) - if invert: - tempdata = list( - map(lambda x: (x[0], 1 / ((x[1] + x[4]) / 2)), candles) - ) - else: - tempdata = list( - map(lambda x: (x[0], (x[1] + x[4]) / 2), candles) - ) - if tempdata: + if tempdata := self.get_avg_candle_prices( + first, last, symbol, exchange, invert + ): for operation in batch: # TODO discuss which candle is picked # current is closest to original date From 6dcb6cebd65466fc2bed9704bf31b8ad005746c6 Mon Sep 17 00:00:00 2001 From: Jeppy Date: Sat, 10 Apr 2021 13:20:51 +0200 Subject: [PATCH 31/53] UPDATE `get_candles` docstring --- src/price_data.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/src/price_data.py b/src/price_data.py index e11be918..e25e2688 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -485,17 +485,26 @@ def get_cost( return price * tr.sold raise NotImplementedError - def get_candles(self, start: int, stop: int, symbol: str, exchange_id: str) -> list: + def get_candles( + self, start: int, stop: int, symbol: str, exchange_id: str + ) -> list[tuple[int, float, float, float, float, float]]: """Return list with candles starting 2 minutes before start. Args: start (int): Start time in milliseconds since epoch. - stop (int): End time in milliseconds. + stop (int): End time in milliseconds since epoch. symbol (str) exchange_id (str) Returns: - list: List of OHLCV candles gathered from ccxt. + list: List of OHLCV candles gathered from ccxt containing: + + timestamp (int): Timestamp of candle in milliseconds since epoch. + open_price (float) + lowest_price (float) + highest_price (float) + close_price (float) + volume (float) """ assert stop >= start, f"`stop` must be after `start` {stop} !>= {start}." From 591c7494ed0e5153df76496c016ded19d069b37e Mon Sep 17 00:00:00 2001 From: Jeppy Date: Sat, 10 Apr 2021 13:53:30 +0200 Subject: [PATCH 32/53] FIX ignore missing import of `ccxt` module `ccxt` does not provide type hints --- setup.cfg | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.cfg b/setup.cfg index 10a17170..3ae0206f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -9,6 +9,9 @@ warn_return_any = True show_error_codes = True warn_unused_configs = True +[mypy-ccxt.*] +ignore_missing_imports = True + [flake8] exclude = *py*env*/ max_line_length = 88 From 522644746d444a3ed919a76aa777ed9b0a369515 Mon Sep 17 00:00:00 2001 From: Jeppy Date: Sat, 10 Apr 2021 20:39:13 +0200 Subject: [PATCH 33/53] FIX mypy/flake8 errors and some refactoring graph --- src/graph.py | 82 ++++++++++++++++++++++++++--------------------- src/price_data.py | 12 +++---- 2 files changed, 50 insertions(+), 44 deletions(-) diff --git a/src/graph.py b/src/graph.py index 93874394..0e4cfdac 100644 --- a/src/graph.py +++ b/src/graph.py @@ -1,5 +1,7 @@ +import collections import logging import time +from typing import Optional import ccxt @@ -7,43 +9,59 @@ class PricePath: - def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}): - if not exchanges: - exchanges = ["binance", "coinbasepro"] + def __init__( + self, + exchanges: Optional[list[str]] = None, + gdict: Optional[dict] = None, + cache: Optional[dict] = None, + ): + if exchanges is None: + exchanges = [] + if gdict is None: + gdict = {} + if cache is None: + cache = {} + self.gdict = gdict self.cache = cache - self.priority: dict[str, int] = {} - # saves the priority for a certain path so that bad paths can be skipped - allpairs = [] + + # Saves the priority for a certain path so that bad paths can be skipped. + self.priority: collections.defaultdict[str, int] = collections.defaultdict(int) + allpairs: list[tuple[str, str, str, str]] = [] for exchange_id in exchanges: exchange_class = getattr(ccxt, exchange_id) exchange = exchange_class() - markets = [] markets = exchange.fetch_markets() - if exchange.has["fetchOHLCV"]: + assert isinstance(markets, list) + if exchange.has["fetchOHLCV"]: allpairs.extend( [(i["base"], i["quote"], exchange_id, i["symbol"]) for i in markets] ) else: logging.warning( - f"{exchange.name} Does not support fetch ohlcv. ignoring exchange and {len(markets)} pairs." + f"{exchange.name} does not support fetch ohlcv. " + f"Ignoring exchange and {len(markets)} pairs." ) - allpairs = list(set(allpairs)) # fast an easy deduplication + + # Remove duplicate pairs. + # TODO It might be faster to create it directly as set. + # Is it even necessary to convert it to a list? + allpairs = list(set(allpairs)) # print("Total Pairs to check:", len(allpairs)) + + # Sorting by `symbol` to have the same result on every run due to the set. allpairs.sort(key=lambda x: x[3]) - # sorting by symbol for pair to have the same result on every run due to the set - for i in allpairs: - base = i[0] - quote = i[1] + + for base, quote, exchange, symbol in allpairs: self.addVertex(base) self.addVertex(quote) self.addEdge( - base, quote, {"exchange": i[2], "symbol": i[3], "inverted": False} + base, quote, {"exchange": exchange, "symbol": symbol, "inverted": False} ) self.addEdge( - quote, base, {"exchange": i[2], "symbol": i[3], "inverted": True} + quote, base, {"exchange": exchange, "symbol": symbol, "inverted": True} ) def edges(self): @@ -81,11 +99,7 @@ def _getpath(self, start, stop, maxdepth, depth=0): if (edges := self.gdict.get(start)) and maxdepth > depth: for edge in edges: if depth == 0 and edge[0] == stop: - paths.append( - [ - edge, - ] - ) + paths.append([edge]) elif edge[0] == stop: paths.append(edge) else: @@ -93,26 +107,22 @@ def _getpath(self, start, stop, maxdepth, depth=0): if len(path) and path is not None: for p in path: if p[0] == stop: - newpath = [ - edge, - ] + newpath = [edge] newpath.append(p) paths.append(newpath) return paths def change_prio(self, key, value): ke = "-".join(key) - if self.priority.get(ke): - self.priority[ke] += value - else: - self.priority[ke] = value + self.priority[ke] += value def getpath( self, start, stop, starttime=0, stoptime=0, preferredexchange=None, maxdepth=3 ): def comb_sort_key(path): """ - Sorting funtction which is used to prioritze paths by (in order of magnitude): + Sorting function which is used to prioritize paths by: + (in order of magnitude) - smallest length -> +1 per element - preferred exchange -> +1 per exchange which is not preferred - priority -> +0.5 per unfinished execution of path @@ -123,13 +133,11 @@ def comb_sort_key(path): # prioritze pairs with the preferred exchange volume = 1 volumenew = 0 - if not ( - priority := self.priority.get( - "-".join([a[1]["symbol"] for a in path]) - ) - ): - priority = 0 - for c in [a if (a := check_cache(pair)) else None for pair in path]: + priority = self.priority.get( + "-".join([a[1]["symbol"] for a in path]), 0 + ) + xl = (a if (a := check_cache(pair)) else None for pair in path) + for c in xl: if c and c[0]: if c[1][1]["stoptime"] == 0: break @@ -267,7 +275,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): if __name__ == "__main__": - g = PricePath() + g = PricePath(exchanges=["binance", "coinbasepro"]) start = "IOTA" to = "EUR" preferredexchange = "binance" diff --git a/src/price_data.py b/src/price_data.py index e25e2688..f7b55f47 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -576,17 +576,15 @@ def _get_bulk_pair_data_path( reference_coin: str, preferredexchange: str = "binance", ) -> list: - def merge_prices(a: list, b: list = []) -> list: - prices = [] + def merge_prices(a: list, b: Optional[list] = None) -> list: if not b: return a + + prices = [] for i in a: - factor = None - for j in b: - if i[0] == j[0]: - factor = j[1] - break + factor = next(j[1] for j in b if i[0] == j[0]) prices.append((i[0], i[1] * factor)) + return prices timestamps: list = [] From 35536ae35f43b2af50a37164165f68c760078c9f Mon Sep 17 00:00:00 2001 From: Jeppy Date: Sun, 11 Apr 2021 14:36:32 +0200 Subject: [PATCH 34/53] ADD make venv and some comments in makefile --- Makefile | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index a2927f0c..72bfa664 100644 --- a/Makefile +++ b/Makefile @@ -9,6 +9,7 @@ mypy: check-isort: isort . --check-only --diff +# Run linter lint: flake8 mypy check-isort isort: @@ -17,13 +18,21 @@ isort: black: black src +# Run formatter format: black isort +# Run the project +run: + python src/main.py + +# Install requirements install: python -m pip install --upgrade pip pip install -r requirements.txt -r requirements-dev.txt -run: - python src/main.py +# Setup virtuel environment +venv: + python -m venv .pyenv + .pyenv\Scripts\activate && make install -.PHONY: flake8 mypy check-isort lint isort black format install run +.PHONY: flake8 mypy check-isort lint isort black format run install venv From 2e9b866a5d30966d26e65a11b858544b3e95e9bd Mon Sep 17 00:00:00 2001 From: Jeppy Date: Sun, 11 Apr 2021 17:06:31 +0200 Subject: [PATCH 35/53] REFACTOR Getting time batches from operations... in _get_bulk_pair_data_path - ADD transaction.time_batches --- src/price_data.py | 28 +++++++--------------- src/transaction.py | 59 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 20 deletions(-) diff --git a/src/price_data.py b/src/price_data.py index f7b55f47..4452e1a4 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -587,28 +587,16 @@ def merge_prices(a: list, b: Optional[list] = None) -> list: return prices - timestamps: list = [] - timestamppairs: list = [] - maxminutes = ( - 300 # coinbasepro only allows a max of 300 minutes need a better solution + # TODO Set `max_difference` to the platform specific ohlcv-limit. + max_difference = 300 # coinbasepro + # TODO Set `max_size` to the platform specific ohlcv-limit. + max_size = 300 # coinbasepro + time_batches = transaction.time_batches( + operations, max_difference=max_difference, max_size=max_size ) - timestamps = [op.utc_time for op in operations] - if not preferredexchange: - preferredexchange = "binance" - - current_first = None - for timestamp in timestamps: - if ( - current_first - and current_first + datetime.timedelta(minutes=maxminutes - 4) - > timestamp - ): - timestamppairs[-1].append(timestamp) - else: - current_first = timestamp - timestamppairs.append([timestamp]) + datacomb = [] - for batch in timestamppairs: + for batch in time_batches: # ccxt works with timestamps in milliseconds first = misc.to_ms_timestamp(batch[0]) last = misc.to_ms_timestamp(batch[-1]) diff --git a/src/transaction.py b/src/transaction.py index 7bdcce32..6eebdd85 100644 --- a/src/transaction.py +++ b/src/transaction.py @@ -130,3 +130,62 @@ class TaxEvent: taxed_gain: decimal.Decimal op: Operation remark: str = "" + + +# Functions + + +def time_batches( + operations: list[Operation], + max_difference: typing.Optional[int], + max_size: typing.Optional[int] = None, +) -> typing.Iterable[list[datetime.datetime]]: + """Return timestamps of operations in batches. + + The batches are clustered such that the batches time difference + from first to last operation is lesser than `max_difference` minutes and the + batches have a maximum size of `max_size`. + + TODO Solve the clustering optimally. (It's already optimal, if max_size is None.) + + Args: + operations (list[Operation]): List of operations. + max_difference (Optional[int], optional): + Maximal time difference in batch (in minutes). + Defaults to None (unlimited time difference). + limax_sizemit (Optional[int], optional): + Maximum size of batch. + Defaults to None (unlimited size). + + Yields: + Generator[None, list[datetime.datetime], None]: Yield the timestamp clusters. + """ + assert max_difference is None or max_difference >= 0 + assert max_size is None or max_size > 0 + + batch: list[datetime.datetime] = [] + + if not operations: + # Nothing to cluster, return empty list. + return batch + + # Calculate the latest time which is allowed to be in this cluster. + if max_difference: + max_time = operations[0].utc_time + datetime.timedelta(minutes=max_difference) + else: + max_time = datetime.datetime.max + + for op in operations: + timestamp = op.utc_time + + # Check if timestamp is before max_time and + # that our cluster isn't to large already. + if timestamp < max_time and (not max_size or len(batch) < max_size): + batch.append(timestamp) + else: + yield batch + + batch = [timestamp] + + if max_difference: + max_time = timestamp + datetime.timedelta(minutes=max_difference) From 842f7319afc501096d39f8af55af6b3473a5b783 Mon Sep 17 00:00:00 2001 From: Jeppy Date: Sun, 11 Apr 2021 17:10:19 +0200 Subject: [PATCH 36/53] ADD TODO: preferredexchange default only for debug purposes --- src/price_data.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/price_data.py b/src/price_data.py index 4452e1a4..1e0f1766 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -569,6 +569,8 @@ def get_avg_candle_prices( avg_candle_prices.append((timestamp_ms, avg_price)) return avg_candle_prices + # TODO preferredexchange default is only for debug purposes and should be + # removed later on. def _get_bulk_pair_data_path( self, operations: list, From cac907c8fde52d269d5fe527b0cd0680637a6211 Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Tue, 27 Apr 2021 19:36:45 +0200 Subject: [PATCH 37/53] refractored path sorting rename variables and function to underscored variant renamed some bad variables --- Makefile | 2 +- src/graph.py | 72 ++++++++++++++++++++++++++-------------------------- 2 files changed, 37 insertions(+), 37 deletions(-) diff --git a/Makefile b/Makefile index 72bfa664..aad7d66c 100644 --- a/Makefile +++ b/Makefile @@ -30,7 +30,7 @@ install: python -m pip install --upgrade pip pip install -r requirements.txt -r requirements-dev.txt -# Setup virtuel environment +# Setup virtual environment venv: python -m venv .pyenv .pyenv\Scripts\activate && make install diff --git a/src/graph.py b/src/graph.py index 0e4cfdac..84c4e462 100644 --- a/src/graph.py +++ b/src/graph.py @@ -55,21 +55,21 @@ def __init__( allpairs.sort(key=lambda x: x[3]) for base, quote, exchange, symbol in allpairs: - self.addVertex(base) - self.addVertex(quote) - self.addEdge( + self.add_Vertex(base) + self.add_Vertex(quote) + self.add_Edge( base, quote, {"exchange": exchange, "symbol": symbol, "inverted": False} ) - self.addEdge( + self.add_Edge( quote, base, {"exchange": exchange, "symbol": symbol, "inverted": True} ) def edges(self): - return self.findedges() + return self.find_edges() # Find the distinct list of edges - def findedges(self): + def find_edges(self): edgename = [] for vrtx in self.gdict: for nxtvrtx in self.gdict[vrtx]: @@ -77,21 +77,21 @@ def findedges(self): edgename.append({vrtx, nxtvrtx}) return edgename - def getVertices(self): + def get_Vertices(self): return list(self.gdict.keys()) # Add the vertex as a key - def addVertex(self, vrtx): + def add_Vertex(self, vrtx): if vrtx not in self.gdict: self.gdict[vrtx] = [] - def addEdge(self, vrtx1, vrtx2, data): + def add_Edge(self, vrtx1, vrtx2, data): if vrtx1 in self.gdict: self.gdict[vrtx1].append((vrtx2, data)) else: self.gdict[vrtx1] = [vrtx2] - def _getpath(self, start, stop, maxdepth, depth=0): + def _get_path(self, start, stop, maxdepth, depth=0): """ a recursive function for finding all possible paths between to edges """ @@ -103,7 +103,7 @@ def _getpath(self, start, stop, maxdepth, depth=0): elif edge[0] == stop: paths.append(edge) else: - path = self._getpath(edge[0], stop, maxdepth, depth=depth + 1) + path = self._get_path(edge[0], stop, maxdepth, depth=depth + 1) if len(path) and path is not None: for p in path: if p[0] == stop: @@ -116,7 +116,7 @@ def change_prio(self, key, value): ke = "-".join(key) self.priority[ke] += value - def getpath( + def get_path( self, start, stop, starttime=0, stoptime=0, preferredexchange=None, maxdepth=3 ): def comb_sort_key(path): @@ -129,27 +129,28 @@ def comb_sort_key(path): - volume (if known) -> 1/sum(avg_vol per pair) - volume (if not known) -> 1 -> always smaller if volume is known """ - if preferredexchange: - # prioritze pairs with the preferred exchange - volume = 1 - volumenew = 0 - priority = self.priority.get( - "-".join([a[1]["symbol"] for a in path]), 0 - ) - xl = (a if (a := check_cache(pair)) else None for pair in path) - for c in xl: - if c and c[0]: - if c[1][1]["stoptime"] == 0: - break - elif c[1][1]["avg_vol"] != 0: - # is very much off because volume is not in the same - # currency something for later - volumenew += c[1][1]["avg_vol"] - - else: + # prioritze pairs with the preferred exchange + volume = 1 + volumenew = 0 + priority = self.priority.get("-".join([a[1]["symbol"] for a in path]), 0) + pathlis = (a if (a := check_cache(pair)) else None for pair in path) + for possiblepath in pathlis: + if possiblepath and possiblepath[0]: + if possiblepath[1][1]["stoptime"] == 0: break + elif possiblepath[1][1]["avg_vol"] != 0: + # is very much off because volume is not in the same + # currency something for later + volumenew += possiblepath[1][1]["avg_vol"] + else: - volume = 1 / volumenew + break + else: + volume = 1 / volumenew + temppriority = volume + priority + + if preferredexchange: + return ( len(path) + sum( @@ -158,11 +159,10 @@ def comb_sort_key(path): for pair in path ] ) - + volume - + priority + + temppriority ) else: - return len(path) + return len(path) + temppriority def check_cache(pair): """ @@ -251,7 +251,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): return (globalstarttime, globalstoptime), path # get all possible paths which are no longer than 4 pairs long - paths = self._getpath(start, stop, maxdepth) + paths = self._get_path(start, stop, maxdepth) # sort by path length to get minimal conversion chain to reduce error paths = sorted(paths, key=comb_sort_key) # get timeframe in which a path is viable @@ -279,7 +279,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): start = "IOTA" to = "EUR" preferredexchange = "binance" - path = g.getpath(start, to, maxdepth=2, preferredexchange=preferredexchange) + path = g.get_path(start, to, maxdepth=2, preferredexchange=preferredexchange) # debug only in actual use we would iterate over # the path object fetching new paths as needed path = list(path) From dd8fb3e8258cbbb87a77e63c54792689ddc2cd5d Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Tue, 27 Apr 2021 22:12:02 +0200 Subject: [PATCH 38/53] better ratelimiting and exchanges are set via config --- src/config.py | 1 + src/graph.py | 34 ++++++++++++++++++++++++++-------- src/price_data.py | 4 ++-- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/src/config.py b/src/config.py index bba8ced3..6ef53f08 100644 --- a/src/config.py +++ b/src/config.py @@ -43,3 +43,4 @@ def IS_LONG_TERM(buy: datetime, sell: datetime) -> bool: DATA_PATH = Path(BASE_PATH, "data") EXPORT_PATH = Path(BASE_PATH, "export") FIAT = FIAT_CLASS.name # Convert to string. +EXCHANGES = ["binance", "coinbasepro"] diff --git a/src/graph.py b/src/graph.py index 84c4e462..e5e513db 100644 --- a/src/graph.py +++ b/src/graph.py @@ -1,6 +1,7 @@ import collections import logging import time +import config from typing import Optional import ccxt @@ -8,6 +9,21 @@ log = logging.getLogger(__name__) +class RateLimit: + exchangedict = {} + + def limit(self, exchange): + if lastcall := self.exchangedict.get(exchange.id): + now = time.time() + delay = exchange.rateLimit / 1000 + timepassed = now - lastcall + if (waitfor := delay - timepassed) > 0: + time.sleep(waitfor) + self.exchangedict[exchange.id] = time.time() + else: + self.exchangedict[exchange.id] = time.time() + + class PricePath: def __init__( self, @@ -16,7 +32,7 @@ def __init__( cache: Optional[dict] = None, ): if exchanges is None: - exchanges = [] + exchanges = list(config.EXCHANGES) if gdict is None: gdict = {} if cache is None: @@ -24,10 +40,11 @@ def __init__( self.gdict = gdict self.cache = cache + self.RateLimit = RateLimit() # Saves the priority for a certain path so that bad paths can be skipped. self.priority: collections.defaultdict[str, int] = collections.defaultdict(int) - allpairs: list[tuple[str, str, str, str]] = [] + allpairs: list(tuple[str, str, str, str]) = [] for exchange_id in exchanges: exchange_class = getattr(ccxt, exchange_id) @@ -89,15 +106,17 @@ def add_Edge(self, vrtx1, vrtx2, data): if vrtx1 in self.gdict: self.gdict[vrtx1].append((vrtx2, data)) else: - self.gdict[vrtx1] = [vrtx2] + self.gdict[vrtx1] = [ + (vrtx2, data), + ] def _get_path(self, start, stop, maxdepth, depth=0): """ - a recursive function for finding all possible paths between to edges + a recursive function for finding all possible paths between to vertices """ paths = [] if (edges := self.gdict.get(start)) and maxdepth > depth: - for edge in edges: + for edge in edges: # list of edges starting from the start vertice if depth == 0 and edge[0] == stop: paths.append([edge]) elif edge[0] == stop: @@ -203,9 +222,8 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1): if not cached: exchange_class = getattr(ccxt, path[i][1]["exchange"]) exchange = exchange_class() - # TODO maybe a more elaborate ratelimit wich removes execution - # time to from the ratelimit - time.sleep(exchange.rateLimit / 1000) + + self.RateLimit.limit(exchange) timeframeexchange = exchange.timeframes.get("1w") if ( timeframeexchange diff --git a/src/price_data.py b/src/price_data.py index 1e0f1766..5a54ef16 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -516,7 +516,7 @@ def get_candles( assert exchange.has["fetchOHLCV"] # time.sleep wants seconds - time.sleep(exchange.rateLimit / 1000) + self.path.RateLimit.limit(exchange) # Get candles 2 min before and after start/stop. since = start - 2 * 60 * 1000 @@ -607,7 +607,7 @@ def merge_prices(a: list, b: Optional[list] = None) -> list: log.info( f"getting data from {str(firststr)} to {str(laststr)} for {str(coin)}" ) - path = self.path.getpath( + path = self.path.get_path( coin, reference_coin, first, last, preferredexchange=preferredexchange ) for p in path: From 98c049f0d8dfc2a8470e3163403cc2bf0242a87b Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Sun, 2 May 2021 10:28:10 +0200 Subject: [PATCH 39/53] change from list to set --- src/graph.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/graph.py b/src/graph.py index e5e513db..400337d6 100644 --- a/src/graph.py +++ b/src/graph.py @@ -44,7 +44,7 @@ def __init__( # Saves the priority for a certain path so that bad paths can be skipped. self.priority: collections.defaultdict[str, int] = collections.defaultdict(int) - allpairs: list(tuple[str, str, str, str]) = [] + allpairs: set(tuple[str, str, str, str]) = set() for exchange_id in exchanges: exchange_class = getattr(ccxt, exchange_id) @@ -53,9 +53,11 @@ def __init__( assert isinstance(markets, list) if exchange.has["fetchOHLCV"]: - allpairs.extend( - [(i["base"], i["quote"], exchange_id, i["symbol"]) for i in markets] - ) + toadd = [ + (i["base"], i["quote"], exchange_id, i["symbol"]) for i in markets + ] + for pair in toadd: + allpairs.add(pair) else: logging.warning( f"{exchange.name} does not support fetch ohlcv. " @@ -65,7 +67,8 @@ def __init__( # Remove duplicate pairs. # TODO It might be faster to create it directly as set. # Is it even necessary to convert it to a list? - allpairs = list(set(allpairs)) + # allpairs = list(set(allpairs)) + allpairs = list(allpairs) # print("Total Pairs to check:", len(allpairs)) # Sorting by `symbol` to have the same result on every run due to the set. From 0808511554ea478c4f76104b064747e91c7df602 Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Wed, 26 May 2021 17:00:54 +0200 Subject: [PATCH 40/53] fixed a bug which caused misses when looking up price_data --- src/price_data.py | 24 +++++++++++++----------- src/transaction.py | 1 + 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/price_data.py b/src/price_data.py index 5a54ef16..a7dcc2a4 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -536,7 +536,7 @@ def get_avg_candle_prices( ) -> list[tuple[int, decimal.Decimal]]: """Return average price from ohlcv candles. - The average price of the candle is calculated as the avergae from the + The average price of the candle is calculated as the average from the open and close price. Further information about candle-function can be found in `get_candles`. @@ -598,6 +598,7 @@ def merge_prices(a: list, b: Optional[list] = None) -> list: ) datacomb = [] + for batch in time_batches: # ccxt works with timestamps in milliseconds first = misc.to_ms_timestamp(batch[0]) @@ -612,7 +613,7 @@ def merge_prices(a: list, b: Optional[list] = None) -> list: ) for p in path: tempdatalis: list = [] - printstr = [a[1]["symbol"] for a in p[1]] + printstr = [f"{a[1]['symbol']} ({a[1]['exchange']})" for a in p[1]] log.debug(f"found path over {' -> '.join(printstr)}") for i in range(len(p[1])): tempdatalis.append([]) @@ -692,13 +693,14 @@ def preload_prices( ) # Preload the prices. - data = self._get_bulk_pair_data_path( - missing_prices_operations, - coin, - reference_coin, - preferredexchange=platform, - ) + if missing_prices_operations: + data = self._get_bulk_pair_data_path( + missing_prices_operations, + coin, + reference_coin, + preferredexchange=platform, + ) - # TODO Use bulk insert to write all prices at once into the database. - for p in data: - self.set_price_db(platform, coin, reference_coin, p[0], p[1]) + # TODO Use bulk insert to write all prices at once into the database. + for p in data: + self.set_price_db(platform, coin, reference_coin, p[0], p[1]) diff --git a/src/transaction.py b/src/transaction.py index 6eebdd85..91db0159 100644 --- a/src/transaction.py +++ b/src/transaction.py @@ -189,3 +189,4 @@ def time_batches( if max_difference: max_time = timestamp + datetime.timedelta(minutes=max_difference) + yield batch # fixes bug where last batch ist not yielded From 6bda2145e528379b35edf28847079d1f16b2cc00 Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Tue, 15 Jun 2021 13:38:46 +0200 Subject: [PATCH 41/53] fix ratelimit for kraken --- src/graph.py | 9 ++++++--- src/price_data.py | 10 +++++++++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/graph.py b/src/graph.py index 400337d6..ecbb28a2 100644 --- a/src/graph.py +++ b/src/graph.py @@ -16,9 +16,11 @@ def limit(self, exchange): if lastcall := self.exchangedict.get(exchange.id): now = time.time() delay = exchange.rateLimit / 1000 + if exchange.name == "Kraken": + delay += 2 # the reported ratelimit gets exceeded sometimes timepassed = now - lastcall if (waitfor := delay - timepassed) > 0: - time.sleep(waitfor) + time.sleep(waitfor + 0.5) self.exchangedict[exchange.id] = time.time() else: self.exchangedict[exchange.id] = time.time() @@ -153,7 +155,7 @@ def comb_sort_key(path): """ # prioritze pairs with the preferred exchange volume = 1 - volumenew = 0 + volumenew = 1 priority = self.priority.get("-".join([a[1]["symbol"] for a in path]), 0) pathlis = (a if (a := check_cache(pair)) else None for pair in path) for possiblepath in pathlis: @@ -163,7 +165,8 @@ def comb_sort_key(path): elif possiblepath[1][1]["avg_vol"] != 0: # is very much off because volume is not in the same # currency something for later - volumenew += possiblepath[1][1]["avg_vol"] + # volumenew*= volume of next thing in path (needs to be fixed for inverted paths) + volumenew *= possiblepath[1][1]["avg_vol"] else: break diff --git a/src/price_data.py b/src/price_data.py index a7dcc2a4..d89dfcba 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -526,8 +526,15 @@ def get_candles( # Most exchange have an upper limit (e.g. binance 1000, coinbasepro 300). # `ccxt` throws an error if we exceed this limit. limit = math.ceil((stop - start) / (1000 * 60)) + 2 + try: + candles = exchange.fetch_ohlcv(symbol, "1m", since, limit) + except ccxt.RateLimitExceeded: + # sometimes the ratelimit gets exceeded for kraken dunno why + logging.warning("Ratelimit exceeded sleeping 10 seconds and retrying") + time.sleep(10) + self.path.RateLimit.limit(exchange) + candles = exchange.fetch_ohlcv(symbol, "1m", since, limit) - candles = exchange.fetch_ohlcv(symbol, "1m", since, limit) assert isinstance(candles, list) return candles @@ -611,6 +618,7 @@ def merge_prices(a: list, b: Optional[list] = None) -> list: path = self.path.get_path( coin, reference_coin, first, last, preferredexchange=preferredexchange ) + # Todo Move the path calculation out of the for loop and only filter after time for p in path: tempdatalis: list = [] printstr = [f"{a[1]['symbol']} ({a[1]['exchange']})" for a in p[1]] From 908043e706d79c2d43b2420da707f7f150830a1e Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Sun, 15 Aug 2021 09:42:36 +0200 Subject: [PATCH 42/53] Update requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 22ad6820..011271fd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ aiodns==2.0.0 aiohttp==3.7.4.post0 async-timeout==3.0.1 attrs==20.3.0 -ccxt==1.42.47 +ccxt==1.42.7 certifi==2020.12.5 cffi==1.14.5 chardet==4.0.0 From f0a52ca39c92aebb08280b703dd200fdfda1ded5 Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Wed, 8 Sep 2021 10:33:38 +0200 Subject: [PATCH 43/53] fix formatting --- src/graph.py | 12 +++++++----- src/price_data.py | 3 ++- src/transaction.py | 3 +-- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/graph.py b/src/graph.py index ecbb28a2..b30ad209 100644 --- a/src/graph.py +++ b/src/graph.py @@ -1,16 +1,17 @@ import collections import logging import time -import config -from typing import Optional +from typing import Dict, Optional import ccxt +import config + log = logging.getLogger(__name__) class RateLimit: - exchangedict = {} + exchangedict: Dict[str, int] = {} def limit(self, exchange): if lastcall := self.exchangedict.get(exchange.id): @@ -46,7 +47,7 @@ def __init__( # Saves the priority for a certain path so that bad paths can be skipped. self.priority: collections.defaultdict[str, int] = collections.defaultdict(int) - allpairs: set(tuple[str, str, str, str]) = set() + allpairs: set[tuple[str, str, str, str]] = set() for exchange_id in exchanges: exchange_class = getattr(ccxt, exchange_id) @@ -165,7 +166,8 @@ def comb_sort_key(path): elif possiblepath[1][1]["avg_vol"] != 0: # is very much off because volume is not in the same # currency something for later - # volumenew*= volume of next thing in path (needs to be fixed for inverted paths) + # volumenew*= volume of next thing in path + # (needs to be fixed for inverted paths) volumenew *= possiblepath[1][1]["avg_vol"] else: diff --git a/src/price_data.py b/src/price_data.py index 1f187054..dc2edac4 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -767,7 +767,8 @@ def merge_prices(a: list, b: Optional[list] = None) -> list: path = self.path.get_path( coin, reference_coin, first, last, preferredexchange=preferredexchange ) - # Todo Move the path calculation out of the for loop and only filter after time + # Todo Move the path calculation out of the for loop + # and only filter after time for p in path: tempdatalis: list = [] printstr = [f"{a[1]['symbol']} ({a[1]['exchange']})" for a in p[1]] diff --git a/src/transaction.py b/src/transaction.py index a3f4f52c..46c9af38 100644 --- a/src/transaction.py +++ b/src/transaction.py @@ -140,7 +140,6 @@ class TaxEvent: remark: str = "" - # Functions @@ -200,6 +199,7 @@ def time_batches( max_time = timestamp + datetime.timedelta(minutes=max_difference) yield batch # fixes bug where last batch ist not yielded + gain_operations = [ CoinLendEnd, StakingEnd, @@ -246,4 +246,3 @@ def key(op: Operation) -> tuple: return tuple([idx] + [getattr(op, key) for key in keys] if keys else []) return sorted(operations, key=key) - From c56ba999542bf672dd6d3b6604c6b5ac62dc698d Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Wed, 8 Sep 2021 10:39:04 +0200 Subject: [PATCH 44/53] fix formatting --- src/graph.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/graph.py b/src/graph.py index b30ad209..1f0717d6 100644 --- a/src/graph.py +++ b/src/graph.py @@ -1,7 +1,7 @@ import collections import logging import time -from typing import Dict, Optional +from typing import Dict, List, Optional, Tuple import ccxt @@ -47,7 +47,7 @@ def __init__( # Saves the priority for a certain path so that bad paths can be skipped. self.priority: collections.defaultdict[str, int] = collections.defaultdict(int) - allpairs: set[tuple[str, str, str, str]] = set() + allpairs: set[Tuple[str, str, str, str]] = set() for exchange_id in exchanges: exchange_class = getattr(ccxt, exchange_id) @@ -71,13 +71,14 @@ def __init__( # TODO It might be faster to create it directly as set. # Is it even necessary to convert it to a list? # allpairs = list(set(allpairs)) - allpairs = list(allpairs) + allpairslist: List[Tuple[str, str, str, str]] = list(allpairs) + del allpairs # print("Total Pairs to check:", len(allpairs)) # Sorting by `symbol` to have the same result on every run due to the set. - allpairs.sort(key=lambda x: x[3]) + allpairslist.sort(key=lambda x: x[3]) - for base, quote, exchange, symbol in allpairs: + for base, quote, exchange, symbol in allpairslist: self.add_Vertex(base) self.add_Vertex(quote) self.add_Edge( From ada48598c37a859387d901c93037495f336a1c7d Mon Sep 17 00:00:00 2001 From: Jeppy Date: Sun, 28 Nov 2021 09:45:30 +0100 Subject: [PATCH 45/53] UPDATE Use types for type hinting --- src/graph.py | 8 ++++---- src/misc.py | 5 ++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/graph.py b/src/graph.py index 1f0717d6..155af825 100644 --- a/src/graph.py +++ b/src/graph.py @@ -1,7 +1,7 @@ import collections import logging import time -from typing import Dict, List, Optional, Tuple +from typing import Optional import ccxt @@ -11,7 +11,7 @@ class RateLimit: - exchangedict: Dict[str, int] = {} + exchangedict: dict[str, int] = {} def limit(self, exchange): if lastcall := self.exchangedict.get(exchange.id): @@ -47,7 +47,7 @@ def __init__( # Saves the priority for a certain path so that bad paths can be skipped. self.priority: collections.defaultdict[str, int] = collections.defaultdict(int) - allpairs: set[Tuple[str, str, str, str]] = set() + allpairs: set[tuple[str, str, str, str]] = set() for exchange_id in exchanges: exchange_class = getattr(ccxt, exchange_id) @@ -71,7 +71,7 @@ def __init__( # TODO It might be faster to create it directly as set. # Is it even necessary to convert it to a list? # allpairs = list(set(allpairs)) - allpairslist: List[Tuple[str, str, str, str]] = list(allpairs) + allpairslist: list[tuple[str, str, str, str]] = list(allpairs) del allpairs # print("Total Pairs to check:", len(allpairs)) diff --git a/src/misc.py b/src/misc.py index cb4846ca..f8ca23f2 100644 --- a/src/misc.py +++ b/src/misc.py @@ -28,7 +28,6 @@ Optional, SupportsFloat, SupportsInt, - Tuple, TypeVar, Union, cast, @@ -122,7 +121,7 @@ def to_decimal_timestamp(d: datetime.datetime) -> decimal.Decimal: def get_offset_timestamps( utc_time: datetime.datetime, offset: datetime.timedelta, -) -> Tuple[int, int]: +) -> tuple[int, int]: """Return timestamps in milliseconds `offset/2` before/after `utc_time`. Args: @@ -130,7 +129,7 @@ def get_offset_timestamps( offset (datetime.timedelta) Returns: - Tuple[int, int]: Timestamps in milliseconds. + tuple[int, int]: Timestamps in milliseconds. """ start = utc_time - offset / 2 end = utc_time + offset / 2 From ee6fdcf573259de04051a0dd596aca90a7e6be3b Mon Sep 17 00:00:00 2001 From: Griffsano <18743559+Griffsano@users.noreply.github.com> Date: Sun, 26 Dec 2021 11:58:30 +0100 Subject: [PATCH 46/53] warning if exchange for CSV export is not in found in CCTX list --- src/book.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/book.py b/src/book.py index b3ce0536..a0409826 100644 --- a/src/book.py +++ b/src/book.py @@ -856,6 +856,24 @@ def read_file(self, file_path: Path) -> None: log.info("Reading file from exchange %s at %s", exchange, file_path) read_file(file_path) + + cctx_mapping = { + "binance": "binance", + "binance_v2": "binance", + "coinbase": "coinbasepro", + "coinbase_pro": "coinbasepro", + "kraken_ledgers_old": "kraken", + "kraken_ledgers": "kraken", + "kraken_trades": "kraken", + "bitpanda_pro_trades": "bitpanda", + } + api = cctx_mapping.get(exchange) + + if api not in config.EXCHANGES: + log.warning( + f"Exchange `{api}` not found in EXCHANGES API list in config.py. " + "Consider adding it to obtain more accurate price data." + ) else: log.warning( f"Unable to detect the exchange of file `{file_path}`. " From bbd8bb81b9c2219d0c4c96e43d32ef3fc2312772 Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Sun, 26 Dec 2021 13:46:11 +0100 Subject: [PATCH 47/53] formatting --- src/taxman.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/taxman.py b/src/taxman.py index 25dfa039..f1ce7809 100644 --- a/src/taxman.py +++ b/src/taxman.py @@ -251,23 +251,23 @@ def _evaluate_taxation_per_coin( def evaluate_taxation(self) -> None: """Evaluate the taxation using country specific function.""" log.debug("Starting evaluation...") - + if config.MULTI_DEPOT: # Evaluate taxation separated by platforms and coins. - for _, operations in misc.group_by( + for platform, operations in misc.group_by( self.book.operations, "platform" ).items(): for coin, _operations in misc.group_by(operations, "coin").items(): - self.price_data.preload_prices(_operations, coin, platform) + self.price_data.preload_prices(_operations, coin, platform) self._evaluate_taxation_per_coin(operations) else: - - for platform, _operations in misc.group_by(operations, "platform").items(): - for coin, coin_operations in misc.group_by(_operations, "coin").items(): - self.price_data.preload_prices(coin_operations, coin, platform) + + for plat, _ops in misc.group_by(self.book.operations, "platform").items(): + for coin, coin_operations in misc.group_by(_ops, "coin").items(): + self.price_data.preload_prices(coin_operations, coin, plat) # Evaluate taxation separated by coins in a single virtual depot. self._evaluate_taxation_per_coin(self.book.operations) - + def print_evaluation(self) -> None: """Print short summary of evaluation to stdout.""" # Summarize the tax evaluation. From 8f24604b973354030e4450185acd364d94b7d0c6 Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Fri, 31 Dec 2021 16:04:29 +0100 Subject: [PATCH 48/53] added progress counter and sorted operations before fetching prices --- src/taxman.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/taxman.py b/src/taxman.py index f1ce7809..1c2a774c 100644 --- a/src/taxman.py +++ b/src/taxman.py @@ -251,20 +251,26 @@ def _evaluate_taxation_per_coin( def evaluate_taxation(self) -> None: """Evaluate the taxation using country specific function.""" log.debug("Starting evaluation...") + counter = 0 + total_operations = len(self.book.operations) + for plat, _ops in misc.group_by(self.book.operations, "platform").items(): + for coin, coin_operations in misc.group_by(_ops, "coin").items(): + s_operations = transaction.sort_operations( + coin_operations, ["utc_time"] + ) + self.price_data.preload_prices(s_operations, coin, plat) + counter += len(coin_operations) + log.info(f"{counter} out of {total_operations} operations processed.") + log.info(f"{counter/total_operations*100}% done") if config.MULTI_DEPOT: # Evaluate taxation separated by platforms and coins. for platform, operations in misc.group_by( self.book.operations, "platform" ).items(): - for coin, _operations in misc.group_by(operations, "coin").items(): - self.price_data.preload_prices(_operations, coin, platform) + self._evaluate_taxation_per_coin(operations) else: - - for plat, _ops in misc.group_by(self.book.operations, "platform").items(): - for coin, coin_operations in misc.group_by(_ops, "coin").items(): - self.price_data.preload_prices(coin_operations, coin, plat) # Evaluate taxation separated by coins in a single virtual depot. self._evaluate_taxation_per_coin(self.book.operations) From 84f7e866bb718eb56ff28be74231a26b02f3245b Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Fri, 31 Dec 2021 17:32:21 +0100 Subject: [PATCH 49/53] kraken ignore and warning and formatting --- src/graph.py | 7 ++++++- src/taxman.py | 2 +- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/graph.py b/src/graph.py index 155af825..6630396f 100644 --- a/src/graph.py +++ b/src/graph.py @@ -54,7 +54,12 @@ def __init__( exchange = exchange_class() markets = exchange.fetch_markets() assert isinstance(markets, list) - + if exchange_id == "kraken": + log.warning( + """Kraken is currently not supported due to only supporting + the last 720 candles of historic data""" + ) + continue if exchange.has["fetchOHLCV"]: toadd = [ (i["base"], i["quote"], exchange_id, i["symbol"]) for i in markets diff --git a/src/taxman.py b/src/taxman.py index 1c2a774c..bab37a6d 100644 --- a/src/taxman.py +++ b/src/taxman.py @@ -265,7 +265,7 @@ def evaluate_taxation(self) -> None: if config.MULTI_DEPOT: # Evaluate taxation separated by platforms and coins. - for platform, operations in misc.group_by( + for _platform, operations in misc.group_by( self.book.operations, "platform" ).items(): From 6fc09486f12753d1952701194a1c2f6734eec509 Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Fri, 31 Dec 2021 17:33:54 +0100 Subject: [PATCH 50/53] formatting --- src/graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graph.py b/src/graph.py index 6630396f..7c235e0c 100644 --- a/src/graph.py +++ b/src/graph.py @@ -56,7 +56,7 @@ def __init__( assert isinstance(markets, list) if exchange_id == "kraken": log.warning( - """Kraken is currently not supported due to only supporting + """Kraken is currently not supported due to only supporting the last 720 candles of historic data""" ) continue From 11d5849a9bc4c2d34048ed2c04a7ef7a4d0bcc24 Mon Sep 17 00:00:00 2001 From: Griffsano <18743559+Griffsano@users.noreply.github.com> Date: Mon, 10 Jan 2022 10:17:15 +0100 Subject: [PATCH 51/53] Ohlcv update (#4) * progress bar output: Reduced to one line * flake8 * variable and file naming * detailed warning if price already exists in database * do not preload prices for Kraken --- src/book.py | 6 +++--- src/price_data.py | 16 +++++++++++++--- src/taxman.py | 6 ++++-- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/src/book.py b/src/book.py index dae7ae8f..cb147ab5 100644 --- a/src/book.py +++ b/src/book.py @@ -858,7 +858,7 @@ def read_file(self, file_path: Path) -> None: log.info("Reading file from exchange %s at %s", exchange, file_path) read_file(file_path) - cctx_mapping = { + ccxt_mapping = { "binance": "binance", "binance_v2": "binance", "coinbase": "coinbasepro", @@ -868,11 +868,11 @@ def read_file(self, file_path: Path) -> None: "kraken_trades": "kraken", "bitpanda_pro_trades": "bitpanda", } - api = cctx_mapping.get(exchange) + api = ccxt_mapping.get(exchange) if api not in config.EXCHANGES: log.warning( - f"Exchange `{api}` not found in EXCHANGES API list in config.py. " + f"Exchange `{api}` not found in EXCHANGES API list in config.ini. " "Consider adding it to obtain more accurate price data." ) else: diff --git a/src/price_data.py b/src/price_data.py index a583cce8..82681677 100644 --- a/src/price_data.py +++ b/src/price_data.py @@ -594,10 +594,14 @@ def set_price_db( if str(e) == f"UNIQUE constraint failed: {tablename}.utc_time": price_db = self.get_price(platform, coin, utc_time, reference_coin) if price != price_db: + rel_error = abs(price - price_db) / price * 100 log.warning( - "Tried to write price to database, " - "but a different price exists already." - f"({platform=}, {tablename=}, {utc_time=}, {price=})" + f"Tried to write {tablename} price to database, but a " + f"different price exists already ({platform} @ {utc_time})" + ) + log.warning( + f"price: {price}, database price: {price_db}, " + f"relative error: %.6f %%", rel_error ) else: raise e @@ -942,6 +946,12 @@ def preload_prices( if not operations or coin == reference_coin: return + if platform == "kraken": + log.warning( + f"Will not preload prices for {platform}, reverting to default API." + ) + return + # Only consider the operations for which we have no prices in the database. missing_prices_operations = self.get_missing_price_operations( operations, coin, platform, reference_coin diff --git a/src/taxman.py b/src/taxman.py index bab37a6d..afed9b92 100644 --- a/src/taxman.py +++ b/src/taxman.py @@ -260,8 +260,10 @@ def evaluate_taxation(self) -> None: ) self.price_data.preload_prices(s_operations, coin, plat) counter += len(coin_operations) - log.info(f"{counter} out of {total_operations} operations processed.") - log.info(f"{counter/total_operations*100}% done") + log.info( + "{:6.2f} % done, {:6d} out of {:d} operations processed". + format(counter / total_operations * 100, counter, total_operations) + ) if config.MULTI_DEPOT: # Evaluate taxation separated by platforms and coins. From 080f475f76984a51124964bf90f527621497be8a Mon Sep 17 00:00:00 2001 From: scientes <34819304+scientes@users.noreply.github.com> Date: Fri, 28 Jan 2022 11:50:30 +0100 Subject: [PATCH 52/53] unpin ccxt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 011271fd..30ba4a41 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ aiodns==2.0.0 aiohttp==3.7.4.post0 async-timeout==3.0.1 attrs==20.3.0 -ccxt==1.42.7 +ccxt>=1.42.7 certifi==2020.12.5 cffi==1.14.5 chardet==4.0.0 From 4f2dfe74e25c61e7c2d7ee778333980346006611 Mon Sep 17 00:00:00 2001 From: Jeppy Date: Sun, 6 Feb 2022 13:17:07 +0100 Subject: [PATCH 53/53] UPDATE warning when ccxt mapping is missing for exchange - ADD comment --- src/book.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/book.py b/src/book.py index bd9dee41..7880f4eb 100644 --- a/src/book.py +++ b/src/book.py @@ -1132,6 +1132,9 @@ def read_file(self, file_path: Path) -> None: log.info("Reading file from exchange %s at %s", exchange, file_path) read_file(file_path) + # Check whether the given exchange is "supported" by our ccxt + # implementation, by comparing the platform with the listed + # ccxt exchanges in our config. ccxt_mapping = { "binance": "binance", "binance_v2": "binance", @@ -1143,8 +1146,12 @@ def read_file(self, file_path: Path) -> None: "bitpanda_pro_trades": "bitpanda", } api = ccxt_mapping.get(exchange) - - if api not in config.EXCHANGES: + if api is None: + log.warning( + f"The exchange {exchange} is not mapped to a ccxt exchange. " + "Please add the exchange to the ccxt_mapping dictionary." + ) + elif api not in config.EXCHANGES: log.warning( f"Exchange `{api}` not found in EXCHANGES API list in config.ini. " "Consider adding it to obtain more accurate price data."