From cd385df78972140a4aa78fc485935e2d5aeb958b Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Fri, 5 Mar 2021 18:06:40 +0100
Subject: [PATCH 01/53] added working price preloading using ccxt

---
 requirements.txt  |   1 +
 src/price_data.py | 109 ++++++++++++++++++++++++++++++++++++++++++++++
 src/taxman.py     |   3 ++
 3 files changed, 113 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index b70bcb89..542c7cb2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
+ccxt==1.42.47
 certifi==2020.12.5
 chardet==4.0.0
 idna==2.10
diff --git a/src/price_data.py b/src/price_data.py
index 51eb438c..c471b070 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -23,8 +23,10 @@
 import time
 from pathlib import Path
 from typing import Any, Optional, Union
+from time import sleep
 
 import requests
+import ccxt
 
 import config
 import misc
@@ -414,3 +416,110 @@ def get_cost(
         if isinstance(tr, transaction.SoldCoin):
             return price * tr.sold
         raise NotImplementedError
+    
+    def get_candles(self, start, stop, symbol):
+        if self.exchange.has['fetchOHLCV']:
+            sleep(self.exchange.rateLimit / 1000)  # time.sleep wants seconds
+            print(f"get {max(int((stop-start)/1000/60)+2,1)} rows")
+            # get 2min before and after range
+            return self.exchange.fetch_ohlcv(symbol, '1m', start-1000*60*2, max(int((stop-start)/1000/60)+5, 1))
+        else:
+            raise Exception
+
+    def initialize_ccxt(self):
+        exchange_id = 'binance'
+        exchange_class = getattr(ccxt, exchange_id)
+        self.exchange = exchange_class()
+        self.markets = []
+        markets = self.exchange.fetch_markets()
+        for market in markets:
+            # may not apply for all exchanges, currently works for binance
+            self.markets.append(market["symbol"].split("/"))
+
+    def _get_binance_bulk_pair_list(self, reference_coin, coin):
+        def get_pair(coin, reference_coin):
+            for market in self.markets:
+                if market[0] == coin and market[1] == reference_coin:
+                    return [coin, reference_coin, False]
+                elif market[1] == coin and market[0] == reference_coin:
+                    return [reference_coin, coin, True]
+
+        pair = get_pair(coin, reference_coin)
+        if not pair:
+            for market in self.markets:
+                if market[0] == coin:
+                    pair = get_pair(market[1], reference_coin)
+                    if pair:
+                        return [[market[0], market[1], False], pair]
+                elif market[1] == coin:
+                    pair = get_pair(market[1], reference_coin)
+                    if pair:
+                        return [[market[1], market[0], True], pair]
+        return [pair, ]
+
+    def _get_binance_bulk_pair_data(self, operations, symbol, invert=False):
+        timestamps = []
+        timestamppairs = []
+        counter = 0
+        data = []
+        for op in operations:
+            timestamps.append(op.utc_time)
+        while len(timestamps) > 0:
+            timestamp = timestamps.pop(0)
+            if len(timestamppairs) > 0 and timestamppairs[-1][0]+datetime.timedelta(minutes=995) > timestamp:
+                timestamppairs[-1].append(timestamp)
+            else:
+                timestamppairs.append([timestamp, ])
+        for batch in timestamppairs:
+            last = int(max(batch).timestamp() * 1000)
+            first = int(min(batch).timestamp() * 1000)
+            if invert:
+                tempdata = list(
+                    map(lambda x: (x[0], 1/((x[1]+x[4])/2)), self.get_candles(first, last, symbol)))
+            else:
+                tempdata = list(
+                    map(lambda x: (x[0], (x[1]+x[4])/2), self.get_candles(first, last, symbol)))
+            if tempdata:
+                for stamp in batch:
+                    # TODO discuss which candle is picked current is closest to original date (often off by about 1-20s, but can be after the Trade)
+                    # times do not always line up perfectly so take one nearest
+                    ts = list(
+                        map(lambda x: (abs(stamp.timestamp()*1000-x[0]), x), tempdata))
+                    print(min(ts, key=lambda x: x[0])[0]/1000)
+                    data.append((stamp, min(ts, key=lambda x: x[0])[1][1]))
+        return data
+
+    def preload_price_data(self, operations, coin):
+        
+        reference_coin = config.FIAT
+        lis = self._get_binance_bulk_pair_list(reference_coin, coin)
+        db_path = self.get_db_path("binance")
+        operations_filtered = []
+        tablename = self.get_tablename(coin, reference_coin)
+
+        if lis:
+            for operation in operations:
+                if not self.__get_price_db(db_path, tablename, operation.utc_time):
+                    operations_filtered.append(operation)
+            if len(lis) == 1 and lis[0]:
+                data = self._get_binance_bulk_pair_data(
+                    operations_filtered, f"{lis[0][0]}/{lis[0][1]}", lis[0][2])
+                for element in data:
+                    self.__set_price_db(db_path, tablename,
+                                        element[0], element[1])
+            elif len(lis) == 2 and lis[0] and lis[1]:
+                data = self._get_binance_bulk_pair_data(
+                    operations_filtered, f"{lis[0][0]}/{lis[0][1]}", lis[0][2])
+                data2 = self._get_binance_bulk_pair_data(
+                    operations_filtered, f"{lis[1][0]}/{lis[1][1]}", lis[1][2])
+                for element in data:
+                    factor = None
+                    for element2 in data2:
+                        if element[0] == element2[0]:
+                            factor = element2[1]
+                            break
+                    if factor:
+                        price = element[1]*factor
+                        if not self.__get_price_db(db_path, tablename, element[0]):
+                            self.__set_price_db(
+                                db_path, tablename, element[0], price)
diff --git a/src/taxman.py b/src/taxman.py
index 056ac3f0..58fd5aea 100644
--- a/src/taxman.py
+++ b/src/taxman.py
@@ -45,6 +45,8 @@ def __init__(self, book: Book, price_data: PriceData) -> None:
             self.__evaluate_taxation = getattr(self, f"_evaluate_taxation_{country}")
         except AttributeError:
             raise NotImplementedError(f"Unable to evaluate taxation for {country=}.")
+        
+        self.price_data.initialize_ccxt()
 
         if config.PRINCIPLE == core.Principle.FIFO:
             self.BalanceType = balance_queue.BalanceQueue
@@ -178,6 +180,7 @@ def evaluate_taxation(self) -> None:
         log.debug("Starting evaluation...")
         for coin, operations in misc.group_by(self.book.operations, "coin").items():
             operations = sorted(operations, key=lambda op: op.utc_time)
+            self.price_data.preload_price_data(operations,coin)
             self.__evaluate_taxation(coin, operations)
 
     def print_evaluation(self) -> None:

From 34f0c66245861b01b5a4b226ebbb8d17466f8c64 Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Fri, 5 Mar 2021 18:32:58 +0100
Subject: [PATCH 02/53] reformatting and documentation

---
 src/price_data.py | 57 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 40 insertions(+), 17 deletions(-)

diff --git a/src/price_data.py b/src/price_data.py
index c471b070..6ef65338 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -420,11 +420,12 @@ def get_cost(
     def get_candles(self, start, stop, symbol):
         if self.exchange.has['fetchOHLCV']:
             sleep(self.exchange.rateLimit / 1000)  # time.sleep wants seconds
-            print(f"get {max(int((stop-start)/1000/60)+2,1)} rows")
             # get 2min before and after range
             return self.exchange.fetch_ohlcv(symbol, '1m', start-1000*60*2, max(int((stop-start)/1000/60)+5, 1))
         else:
-            raise Exception
+            logging.warning(
+                "fetchOHLCV not implemented on exchange, skipping priceloading using ohlcv")
+            raise None
 
     def initialize_ccxt(self):
         exchange_id = 'binance'
@@ -432,94 +433,116 @@ def initialize_ccxt(self):
         self.exchange = exchange_class()
         self.markets = []
         markets = self.exchange.fetch_markets()
+        
         for market in markets:
             # may not apply for all exchanges, currently works for binance
+            # caches a list of all pairs on the exchange
             self.markets.append(market["symbol"].split("/"))
 
     def _get_binance_bulk_pair_list(self, reference_coin, coin):
+
         def get_pair(coin, reference_coin):
+
             for market in self.markets:
                 if market[0] == coin and market[1] == reference_coin:
-                    return [coin, reference_coin, False]
+                    return [coin, reference_coin, False]  # False=not inverted
                 elif market[1] == coin and market[0] == reference_coin:
-                    return [reference_coin, coin, True]
+                    return [reference_coin, coin, True]  # True=inverted
 
         pair = get_pair(coin, reference_coin)
         if not pair:
+
             for market in self.markets:
-                if market[0] == coin:
-                    pair = get_pair(market[1], reference_coin)
-                    if pair:
+                pair = get_pair(market[1], reference_coin)
+
+                if pair:
+                    if market[0] == coin:
                         return [[market[0], market[1], False], pair]
-                elif market[1] == coin:
-                    pair = get_pair(market[1], reference_coin)
-                    if pair:
+                    elif market[1] == coin:
                         return [[market[1], market[0], True], pair]
-        return [pair, ]
+        else:
+            return [pair, ]
 
     def _get_binance_bulk_pair_data(self, operations, symbol, invert=False):
         timestamps = []
         timestamppairs = []
-        counter = 0
         data = []
+
         for op in operations:
             timestamps.append(op.utc_time)
+
         while len(timestamps) > 0:
             timestamp = timestamps.pop(0)
+
             if len(timestamppairs) > 0 and timestamppairs[-1][0]+datetime.timedelta(minutes=995) > timestamp:
                 timestamppairs[-1].append(timestamp)
             else:
                 timestamppairs.append([timestamp, ])
+
         for batch in timestamppairs:
+            # ccxt works with timestamps in milliseconds
             last = int(max(batch).timestamp() * 1000)
             first = int(min(batch).timestamp() * 1000)
+
             if invert:
                 tempdata = list(
                     map(lambda x: (x[0], 1/((x[1]+x[4])/2)), self.get_candles(first, last, symbol)))
             else:
                 tempdata = list(
                     map(lambda x: (x[0], (x[1]+x[4])/2), self.get_candles(first, last, symbol)))
+
             if tempdata:
-                for stamp in batch:
+                for operation in batch:
                     # TODO discuss which candle is picked current is closest to original date (often off by about 1-20s, but can be after the Trade)
                     # times do not always line up perfectly so take one nearest
                     ts = list(
-                        map(lambda x: (abs(stamp.timestamp()*1000-x[0]), x), tempdata))
-                    print(min(ts, key=lambda x: x[0])[0]/1000)
-                    data.append((stamp, min(ts, key=lambda x: x[0])[1][1]))
+                        map(lambda x: (abs(operation.timestamp()*1000-x[0]), x), tempdata))
+                    data.append((operation, min(ts, key=lambda x: x[0])[1][1]))
         return data
 
     def preload_price_data(self, operations, coin):
-        
+
         reference_coin = config.FIAT
+        # get pairs used for calculating the price
         lis = self._get_binance_bulk_pair_list(reference_coin, coin)
         db_path = self.get_db_path("binance")
         operations_filtered = []
         tablename = self.get_tablename(coin, reference_coin)
 
         if lis:
+
             for operation in operations:
                 if not self.__get_price_db(db_path, tablename, operation.utc_time):
                     operations_filtered.append(operation)
+
+            # len 1== direct pairing with base currency
             if len(lis) == 1 and lis[0]:
                 data = self._get_binance_bulk_pair_data(
                     operations_filtered, f"{lis[0][0]}/{lis[0][1]}", lis[0][2])
                 for element in data:
                     self.__set_price_db(db_path, tablename,
                                         element[0], element[1])
+
+            # len 2 == calculates price using two pairs e.g IOTA/ETH + ETH/EUR
             elif len(lis) == 2 and lis[0] and lis[1]:
+                # get data for first pair
                 data = self._get_binance_bulk_pair_data(
                     operations_filtered, f"{lis[0][0]}/{lis[0][1]}", lis[0][2])
+                # get data for second pair
                 data2 = self._get_binance_bulk_pair_data(
                     operations_filtered, f"{lis[1][0]}/{lis[1][1]}", lis[1][2])
+
                 for element in data:
                     factor = None
+
                     for element2 in data2:
                         if element[0] == element2[0]:
                             factor = element2[1]
                             break
+
                     if factor:
                         price = element[1]*factor
+                        # check if timestamp already exists to prevent a duplicate error
                         if not self.__get_price_db(db_path, tablename, element[0]):
                             self.__set_price_db(
                                 db_path, tablename, element[0], price)

From 777c44172785eeb0163d0cf0fde3e1e103f7fe33 Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Fri, 5 Mar 2021 18:39:35 +0100
Subject: [PATCH 03/53] added type hints

---
 src/price_data.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/price_data.py b/src/price_data.py
index 6ef65338..a435d255 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -417,7 +417,7 @@ def get_cost(
             return price * tr.sold
         raise NotImplementedError
     
-    def get_candles(self, start, stop, symbol):
+    def get_candles(self, start: int, stop: int, symbol: str) ->list:
         if self.exchange.has['fetchOHLCV']:
             sleep(self.exchange.rateLimit / 1000)  # time.sleep wants seconds
             # get 2min before and after range
@@ -433,15 +433,15 @@ def initialize_ccxt(self):
         self.exchange = exchange_class()
         self.markets = []
         markets = self.exchange.fetch_markets()
-        
+
         for market in markets:
             # may not apply for all exchanges, currently works for binance
             # caches a list of all pairs on the exchange
             self.markets.append(market["symbol"].split("/"))
 
-    def _get_binance_bulk_pair_list(self, reference_coin, coin):
+    def _get_binance_bulk_pair_list(self, reference_coin: str = config.FIAT, coin) -> list:
 
-        def get_pair(coin, reference_coin):
+        def get_pair(coin, reference_coin:str):
 
             for market in self.markets:
                 if market[0] == coin and market[1] == reference_coin:
@@ -463,7 +463,7 @@ def get_pair(coin, reference_coin):
         else:
             return [pair, ]
 
-    def _get_binance_bulk_pair_data(self, operations, symbol, invert=False):
+    def _get_binance_bulk_pair_data(self, operations: list, symbol: str, invert: str=False) ->list:
         timestamps = []
         timestamppairs = []
         data = []
@@ -500,7 +500,7 @@ def _get_binance_bulk_pair_data(self, operations, symbol, invert=False):
                     data.append((operation, min(ts, key=lambda x: x[0])[1][1]))
         return data
 
-    def preload_price_data(self, operations, coin):
+    def preload_price_data(self, operations: list, coin: str):
 
         reference_coin = config.FIAT
         # get pairs used for calculating the price

From a1b9f8e6e8a09d84c9c2a48042001969cf55a81a Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Wed, 31 Mar 2021 15:18:36 +0200
Subject: [PATCH 04/53] added walrus operator

---
 src/price_data.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/src/price_data.py b/src/price_data.py
index a435d255..1347dcb0 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -449,9 +449,10 @@ def get_pair(coin, reference_coin:str):
                 elif market[1] == coin and market[0] == reference_coin:
                     return [reference_coin, coin, True]  # True=inverted
 
-        pair = get_pair(coin, reference_coin)
-        if not pair:
+        if pair := get_pair(coin, reference_coin)
+            return [pair, ]
 
+        else:
             for market in self.markets:
                 pair = get_pair(market[1], reference_coin)
 
@@ -460,8 +461,6 @@ def get_pair(coin, reference_coin:str):
                         return [[market[0], market[1], False], pair]
                     elif market[1] == coin:
                         return [[market[1], market[0], True], pair]
-        else:
-            return [pair, ]
 
     def _get_binance_bulk_pair_data(self, operations: list, symbol: str, invert: str=False) ->list:
         timestamps = []

From 5089a9e192aa84d189752a9be1134c8df69e7473 Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Wed, 31 Mar 2021 15:42:32 +0200
Subject: [PATCH 05/53] fixes smaller issues

---
 src/price_data.py | 67 ++++++++++++++++++++++-------------------------
 1 file changed, 31 insertions(+), 36 deletions(-)

diff --git a/src/price_data.py b/src/price_data.py
index 1347dcb0..d1f7ef47 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -439,50 +439,49 @@ def initialize_ccxt(self):
             # caches a list of all pairs on the exchange
             self.markets.append(market["symbol"].split("/"))
 
-    def _get_binance_bulk_pair_list(self, reference_coin: str = config.FIAT, coin) -> list:
-
+    def _get_bulk_pair_list(self, coin,reference_coin: str = config.FIAT) -> list:
+        def cmp_asset_pairs(our_pair: tuple[str, str], market_pair: tuple[str, str]) -> Optional[tuple[str, str, bool]]:
+            if our_pair == market_pair:
+                return *market_pair, False
+            if reversed(our_pair) == market_pair:
+                return *market_pair, True
+            return None
+        
         def get_pair(coin, reference_coin:str):
-
+            our_symbols = [coin, reference_coin]
             for market in self.markets:
-                if market[0] == coin and market[1] == reference_coin:
-                    return [coin, reference_coin, False]  # False=not inverted
-                elif market[1] == coin and market[0] == reference_coin:
-                    return [reference_coin, coin, True]  # True=inverted
+                if cmp := cmp_asset_pairs(our_symbols, market):
+                    return cmp 
 
-        if pair := get_pair(coin, reference_coin)
-            return [pair, ]
+        if pair := get_pair(coin, reference_coin):
+            return [pair]
 
         else:
             for market in self.markets:
-                pair = get_pair(market[1], reference_coin)
-
-                if pair:
+                if pair:=get_pair(market[1], reference_coin):
                     if market[0] == coin:
-                        return [[market[0], market[1], False], pair]
-                    elif market[1] == coin:
-                        return [[market[1], market[0], True], pair]
+                        return [(*market, False), pair]
+                    if market[1] == coin:
+                        return [(*market, True), pair]
 
-    def _get_binance_bulk_pair_data(self, operations: list, symbol: str, invert: str=False) ->list:
+    def _get_bulk_pair_data(self, operations: list, symbol: str, invert: str=False) ->list:
         timestamps = []
         timestamppairs = []
         data = []
 
-        for op in operations:
-            timestamps.append(op.utc_time)
+        timestamps = (op.utc_time for op in operations)
 
-        while len(timestamps) > 0:
-            timestamp = timestamps.pop(0)
+        for timestamp in timestamps:
 
             if len(timestamppairs) > 0 and timestamppairs[-1][0]+datetime.timedelta(minutes=995) > timestamp:
                 timestamppairs[-1].append(timestamp)
             else:
-                timestamppairs.append([timestamp, ])
+                timestamppairs.append([timestamp])
 
         for batch in timestamppairs:
             # ccxt works with timestamps in milliseconds
-            last = int(max(batch).timestamp() * 1000)
-            first = int(min(batch).timestamp() * 1000)
-
+            first = misc.to_ms_timestamp(batch[0])
+            last = misc.to_ms_timestamp(batch[-1])
             if invert:
                 tempdata = list(
                     map(lambda x: (x[0], 1/((x[1]+x[4])/2)), self.get_candles(first, last, symbol)))
@@ -495,7 +494,7 @@ def _get_binance_bulk_pair_data(self, operations: list, symbol: str, invert: str
                     # TODO discuss which candle is picked current is closest to original date (often off by about 1-20s, but can be after the Trade)
                     # times do not always line up perfectly so take one nearest
                     ts = list(
-                        map(lambda x: (abs(operation.timestamp()*1000-x[0]), x), tempdata))
+                        map(lambda x: (abs(misc.to_ms_timestamp(operation.timestamp)*1000-x[0]), x), tempdata))
                     data.append((operation, min(ts, key=lambda x: x[0])[1][1]))
         return data
 
@@ -503,20 +502,17 @@ def preload_price_data(self, operations: list, coin: str):
 
         reference_coin = config.FIAT
         # get pairs used for calculating the price
-        lis = self._get_binance_bulk_pair_list(reference_coin, coin)
         db_path = self.get_db_path("binance")
         operations_filtered = []
         tablename = self.get_tablename(coin, reference_coin)
 
-        if lis:
+        if lis:=self._get_bulk_pair_list(coin,reference_coin):
 
-            for operation in operations:
-                if not self.__get_price_db(db_path, tablename, operation.utc_time):
-                    operations_filtered.append(operation)
+            operations_filtered = [op for op in operations if not self.__get_price_db(db_path, tablename, op.utc_time)]
 
             # len 1== direct pairing with base currency
             if len(lis) == 1 and lis[0]:
-                data = self._get_binance_bulk_pair_data(
+                data = self._get_bulk_pair_data(
                     operations_filtered, f"{lis[0][0]}/{lis[0][1]}", lis[0][2])
                 for element in data:
                     self.__set_price_db(db_path, tablename,
@@ -525,10 +521,10 @@ def preload_price_data(self, operations: list, coin: str):
             # len 2 == calculates price using two pairs e.g IOTA/ETH + ETH/EUR
             elif len(lis) == 2 and lis[0] and lis[1]:
                 # get data for first pair
-                data = self._get_binance_bulk_pair_data(
+                data = self._get_bulk_pair_data(
                     operations_filtered, f"{lis[0][0]}/{lis[0][1]}", lis[0][2])
                 # get data for second pair
-                data2 = self._get_binance_bulk_pair_data(
+                data2 = self._get_bulk_pair_data(
                     operations_filtered, f"{lis[1][0]}/{lis[1][1]}", lis[1][2])
 
                 for element in data:
@@ -542,6 +538,5 @@ def preload_price_data(self, operations: list, coin: str):
                     if factor:
                         price = element[1]*factor
                         # check if timestamp already exists to prevent a duplicate error
-                        if not self.__get_price_db(db_path, tablename, element[0]):
-                            self.__set_price_db(
-                                db_path, tablename, element[0], price)
+                        self.set_price_db(
+                            db_path, tablename, element[0], price)

From 1ac3e7769fdbd2f244be482b1c709967a35b1c03 Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Wed, 31 Mar 2021 15:48:27 +0200
Subject: [PATCH 06/53] change warning to error

---
 src/price_data.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/price_data.py b/src/price_data.py
index d1f7ef47..54c4708d 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -423,9 +423,9 @@ def get_candles(self, start: int, stop: int, symbol: str) ->list:
             # get 2min before and after range
             return self.exchange.fetch_ohlcv(symbol, '1m', start-1000*60*2, max(int((stop-start)/1000/60)+5, 1))
         else:
-            logging.warning(
+            logging.error(
                 "fetchOHLCV not implemented on exchange, skipping priceloading using ohlcv")
-            raise None
+            return None
 
     def initialize_ccxt(self):
         exchange_id = 'binance'

From ed50892286cd9ceee9ea449e5e2b69f8f8ac6a58 Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Sun, 4 Apr 2021 22:11:21 +0200
Subject: [PATCH 07/53] PoC for a grpah based solution

---
 src/graph.py | 195 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 195 insertions(+)
 create mode 100644 src/graph.py

diff --git a/src/graph.py b/src/graph.py
new file mode 100644
index 00000000..77076857
--- /dev/null
+++ b/src/graph.py
@@ -0,0 +1,195 @@
+import ccxt
+from datetime import datetime
+from time import sleep, time_ns
+
+
+class graph:
+
+    def __init__(self, gdict=None, cache=None):
+        if not gdict:
+            gdict = {}
+        if not cache:
+            cache = {}
+        self.gdict = gdict
+        self.cache = cache
+
+    def edges(self):
+        return self.findedges()
+# Find the distinct list of edges
+
+    def findedges(self):
+        edgename = []
+        for vrtx in self.gdict:
+            for nxtvrtx in self.gdict[vrtx]:
+                if {nxtvrtx, vrtx} not in edgename:
+                    edgename.append({vrtx, nxtvrtx})
+        return edgename
+
+    def getVertices(self):
+        return list(self.gdict.keys())
+
+# Add the vertex as a key
+    def addVertex(self, vrtx):
+       if vrtx not in self.gdict:
+           self.gdict[vrtx] = []
+
+    def addEdge(self, vrtx1, vrtx2, data):
+        if vrtx1 in self.gdict:
+            self.gdict[vrtx1].append((vrtx2, data))
+        else:
+            self.gdict[vrtx1] = [vrtx2]
+
+    def _getpath(self, start, stop, maxdepth, depth=0):
+        paths = []
+        if (edges := g.gdict.get(start)) and maxdepth > depth:
+            for edge in edges:
+                if depth == 0 and edge[0] == stop:
+                    paths.append([edge, ])
+                elif edge[0] == stop:
+                    paths.append(edge)
+                else:
+                    path = self._getpath(
+                        edge[0], stop, maxdepth, depth=depth+1)
+                    if len(path) and path is not None:
+                        for p in path:
+                            if p[0] == stop:
+                                newpath = [edge, ]
+                                newpath.append(p)
+                                paths.append(newpath)
+        #if len(paths)>3 and depth in [0,1]:
+        #    print(len(paths))
+        return paths
+
+    def getpath(self, start, stop, starttime=0, stoptime=0, preferredexchange=None, maxdepth=3):
+        def comb_sort_key(path):
+            if preferredexchange:
+                # prioritze pairs with the preferred exchange
+                return len(path)+sum([0 if pair[1]["exchange"] == preferredexchange else 1 for pair in path])
+            else:
+                return len(path)
+
+        def check_cache(pair):
+
+            if pair[1].get("starttime") or pair[1].get("stoptime"):
+                return True, pair
+            if cacheres := self.cache.get(pair[1]["exchange"]+pair[1]["symbol"]):
+                pair[1]["starttime"] = cacheres[0]
+                pair[1]["stoptime"] = cacheres[1]
+                pair[1]["avg_vol"] = cacheres[2]
+                return True, pair
+            return False, pair
+
+        def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
+            rangeinms = 0
+            timeframe = int(6.048e+8)  # week in ms
+            if starttimestamp == 0:
+                starttimestamp = 1325372400*1000
+            if stoptimestamp == -1:
+                stoptimestamp = time_ns() // 1_000_000  # get cur time in ms
+            starttimestamp -= timeframe  # to handle edge cases
+            if stoptimestamp > starttimestamp:
+                rangeinms = stoptimestamp-starttimestamp
+            else:
+                rangeinms = 0  # maybe throw error
+
+            # add one candle to the end to ensure the needed timeslot is in the requested candles
+            rangeincandles = int(rangeinms/timeframe)+1
+
+            #todo: cache already used pairs
+            globalstarttime = 0
+            globalstoptime = 0
+            for i in range(len(path)):
+                cached, path[i] = check_cache(path[i])
+                if not cached:
+                    exchange_class = getattr(ccxt, path[i][1]["exchange"])
+                    exchange = exchange_class()
+                    sleep(exchange.rateLimit / 1000)
+                    timeframeexchange = exchange.timeframes.get("1w")
+                    if timeframeexchange:  # this must be handled better maybe choose timeframe dynamically
+                        # maybe cache this per pair
+                        ohlcv = exchange.fetch_ohlcv(
+                            path[i][1]["symbol"], "1w", starttimestamp, rangeincandles)
+                    else:
+                        ohlcv = []  # do not check fail later
+                    if len(ohlcv) > 1:
+                        # (candle ends after the date + timeframe)
+                        path[i][1]["stoptime"] = ohlcv[-1][0]+timeframe
+                        path[i][1]["avg_vol"] = sum(
+                            [vol[-1] for vol in ohlcv])/len(ohlcv)  # avg vol in curr
+                        path[i][1]["starttime"] = ohlcv[0][0]
+                        if path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0:
+                            globalstoptime = path[i][1]["stoptime"]
+                        if path[i][1]["starttime"] > globalstarttime:
+                            globalstarttime = path[i][1]["starttime"]
+                    else:
+                        path[i][1]["stoptime"] = 0
+                        path[i][1]["starttime"] = 0
+                        path[i][1]["avg_vol"] = 0
+                    self.cache[path[i][1]["exchange"]+path[i][1]["symbol"]] = (
+                        path[i][1]["starttime"], path[i][1]["stoptime"], path[i][1]["avg_vol"])
+                else:
+                    if path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0:
+                        globalstoptime = path[i][1]["stoptime"]
+                    if path[i][1]["starttime"] > globalstarttime:
+                        globalstarttime = path[i][1]["starttime"]
+                    ohlcv = []
+                print(len(ohlcv)-rangeincandles, rangeincandles)
+            return (globalstarttime, globalstoptime), path
+
+        # get all possible paths which are no longer than 4 pairs long
+        paths = self._getpath(start, stop, maxdepth)
+        # sort by path length to get minimal conversion chain to reduce error
+        paths = sorted(paths, key=comb_sort_key)
+        #get timeframe in which a path is viable 
+        for path in paths:
+            timest, newpath = get_active_timeframe(path)
+            # this is implemented as a generator (hence the yield) to reduce the amount of computing needed. if the first
+            if starttime == 0 and stoptime == 0:
+                yield timest, newpath
+            elif starttime == 0:
+                if stoptime < timest[1]:
+                    yield timest, newpath
+            elif stoptime == 0:
+                if starttime > timest[0]:
+                    yield timest, newpath
+            else:
+                if stoptime < timest[1] and starttime > timest[0]:
+                    yield timest, newpath
+
+
+if __name__ == "__main__":
+    g = graph()
+    allpairs = []
+    for exchange_id in ["binance", "coinbase", "kraken", "coinbasepro", "aax", "bittrex", "bitvavo"]:
+        exchange_class = getattr(ccxt, exchange_id)
+        exchange = exchange_class()
+        markets = []
+        markets = exchange.fetch_markets()
+        if exchange.has['fetchOHLCV']:
+
+            allpairs.extend(
+                [(i["base"], i["quote"], exchange_id, i["symbol"])for i in markets])
+        else:
+            print(
+                f"{exchange.name} Does not support fetch ohlcv. ignoring exchange and {len(markets)} pairs.")
+        #print(len([(i["base"],i["quote"],exchange_id,i["symbol"])for i in markets]),len(markets))
+    allpairs = list(set(allpairs))
+    print("Total Pairs to check:", len(allpairs))
+    for i in allpairs:
+        base = i[0]
+        quote = i[1]
+        g.addVertex(base)
+        g.addVertex(quote)
+        g.addEdge(base, quote, {
+                  "exchange": i[2], "symbol": i[3], "inverted": False})
+        g.addEdge(quote, base, {
+                  "exchange": i[2], "symbol": i[3], "inverted": True})
+
+    start = "IOTA"
+    to = "EUR"
+    preferredexchange = "binance"
+    path = g.getpath(start, to, maxdepth=2,
+                     preferredexchange=preferredexchange)
+    #debug only in actual use we would iterate over the path object fetching new paths as needed
+    path = list(path)
+    print(len(path))

From 657666baa7e2dfd3cade03ce2d3d456246cc1740 Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Thu, 8 Apr 2021 09:42:32 +0200
Subject: [PATCH 08/53] working graph implementation

---
 src/balance_queue.py |   2 +-
 src/graph.py         |  98 ++++++++++++++--------
 src/log_config.py    |   1 +
 src/price_data.py    | 194 +++++++++++++++++++++----------------------
 src/taxman.py        |   4 +-
 5 files changed, 163 insertions(+), 136 deletions(-)

diff --git a/src/balance_queue.py b/src/balance_queue.py
index a8c8594e..d98aa942 100644
--- a/src/balance_queue.py
+++ b/src/balance_queue.py
@@ -125,7 +125,7 @@ def sell(self, change: decimal.Decimal) -> Optional[list[transaction.SoldCoin]]:
                 return None
 
             not_sold = bop.op.change - bop.sold
-            assert not_sold > 0
+            assert not_sold >= 0
 
             if not_sold > change:
                 bop.sold += change
diff --git a/src/graph.py b/src/graph.py
index 77076857..fe80af69 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -3,15 +3,44 @@
 from time import sleep, time_ns
 
 
-class graph:
+class PricePath:
 
-    def __init__(self, gdict=None, cache=None):
+    def __init__(self, exchanges:list=None,gdict:dict=None, cache:dict=None):
         if not gdict:
             gdict = {}
         if not cache:
             cache = {}
+        if not exchanges:
+            exchanges = ["binance","coinbasepro"]
         self.gdict = gdict
         self.cache = cache
+        self.priority={}
+        allpairs=[]
+
+        for exchange_id in exchanges:
+            exchange_class = getattr(ccxt, exchange_id)
+            exchange = exchange_class()
+            markets = []
+            markets = exchange.fetch_markets()
+            if exchange.has['fetchOHLCV']:
+
+                allpairs.extend(
+                    [(i["base"], i["quote"], exchange_id, i["symbol"])for i in markets])
+            else:
+                print(
+                    f"{exchange.name} Does not support fetch ohlcv. ignoring exchange and {len(markets)} pairs.")
+        allpairs = list(set(allpairs))
+        #print("Total Pairs to check:", len(allpairs))
+        allpairs.sort(key=lambda x: x[3])
+        for i in allpairs:
+            base = i[0]
+            quote = i[1]
+            self.addVertex(base)
+            self.addVertex(quote)
+            self.addEdge(base, quote, {
+                    "exchange": i[2], "symbol": i[3], "inverted": False})
+            self.addEdge(quote, base, {
+                    "exchange": i[2], "symbol": i[3], "inverted": True})
 
     def edges(self):
         return self.findedges()
@@ -41,7 +70,7 @@ def addEdge(self, vrtx1, vrtx2, data):
 
     def _getpath(self, start, stop, maxdepth, depth=0):
         paths = []
-        if (edges := g.gdict.get(start)) and maxdepth > depth:
+        if (edges := self.gdict.get(start)) and maxdepth > depth:
             for edge in edges:
                 if depth == 0 and edge[0] == stop:
                     paths.append([edge, ])
@@ -56,15 +85,36 @@ def _getpath(self, start, stop, maxdepth, depth=0):
                                 newpath = [edge, ]
                                 newpath.append(p)
                                 paths.append(newpath)
-        #if len(paths)>3 and depth in [0,1]:
-        #    print(len(paths))
         return paths
 
+    def change_prio(self,key,value):
+        ke="-".join(key)
+        if self.priority.get(ke):
+            self.priority[ke]+=value
+        else:
+            self.priority[ke]=value
+
     def getpath(self, start, stop, starttime=0, stoptime=0, preferredexchange=None, maxdepth=3):
         def comb_sort_key(path):
             if preferredexchange:
                 # prioritze pairs with the preferred exchange
-                return len(path)+sum([0 if pair[1]["exchange"] == preferredexchange else 1 for pair in path])
+                volume=1
+                volumenew=0
+                if not (priority:=self.priority.get("-".join([ a[1]["symbol"] for a in  path]))):
+                    priority=0
+                for c in [a if (a := check_cache(pair)) else None for pair in path]:
+                    if c and c[0]:
+                        if c[1][1]["stoptime"]==0:
+                            break
+                        elif c[1][1]["avg_vol"]!=0:
+                            volumenew+=c[1][1]["avg_vol"] #is very much off because volume is not in the same currency something for later
+
+
+                    else:
+                        break
+                else:
+                    volume=1/volumenew
+                return len(path)+sum([0 if pair[1]["exchange"] == preferredexchange else 1 for pair in path])+volume+priority
             else:
                 return len(path)
 
@@ -103,7 +153,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
                 if not cached:
                     exchange_class = getattr(ccxt, path[i][1]["exchange"])
                     exchange = exchange_class()
-                    sleep(exchange.rateLimit / 1000)
+                    sleep(exchange.rateLimit / 1000) #maybe a more elaborate ratelimit wich counts execution time to waiting
                     timeframeexchange = exchange.timeframes.get("1w")
                     if timeframeexchange:  # this must be handled better maybe choose timeframe dynamically
                         # maybe cache this per pair
@@ -128,12 +178,12 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
                     self.cache[path[i][1]["exchange"]+path[i][1]["symbol"]] = (
                         path[i][1]["starttime"], path[i][1]["stoptime"], path[i][1]["avg_vol"])
                 else:
-                    if path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0:
+
+                    if (path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0) and path[i][1]["stoptime"]!=0:
                         globalstoptime = path[i][1]["stoptime"]
-                    if path[i][1]["starttime"] > globalstarttime:
+                    if path[i][1]["starttime"] > globalstarttime :
                         globalstarttime = path[i][1]["starttime"]
                     ohlcv = []
-                print(len(ohlcv)-rangeincandles, rangeincandles)
             return (globalstarttime, globalstoptime), path
 
         # get all possible paths which are no longer than 4 pairs long
@@ -152,39 +202,15 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
             elif stoptime == 0:
                 if starttime > timest[0]:
                     yield timest, newpath
+            
             else:
                 if stoptime < timest[1] and starttime > timest[0]:
                     yield timest, newpath
 
 
 if __name__ == "__main__":
-    g = graph()
+    g = PricePath()
     allpairs = []
-    for exchange_id in ["binance", "coinbase", "kraken", "coinbasepro", "aax", "bittrex", "bitvavo"]:
-        exchange_class = getattr(ccxt, exchange_id)
-        exchange = exchange_class()
-        markets = []
-        markets = exchange.fetch_markets()
-        if exchange.has['fetchOHLCV']:
-
-            allpairs.extend(
-                [(i["base"], i["quote"], exchange_id, i["symbol"])for i in markets])
-        else:
-            print(
-                f"{exchange.name} Does not support fetch ohlcv. ignoring exchange and {len(markets)} pairs.")
-        #print(len([(i["base"],i["quote"],exchange_id,i["symbol"])for i in markets]),len(markets))
-    allpairs = list(set(allpairs))
-    print("Total Pairs to check:", len(allpairs))
-    for i in allpairs:
-        base = i[0]
-        quote = i[1]
-        g.addVertex(base)
-        g.addVertex(quote)
-        g.addEdge(base, quote, {
-                  "exchange": i[2], "symbol": i[3], "inverted": False})
-        g.addEdge(quote, base, {
-                  "exchange": i[2], "symbol": i[3], "inverted": True})
-
     start = "IOTA"
     to = "EUR"
     preferredexchange = "binance"
diff --git a/src/log_config.py b/src/log_config.py
index 9cf94aca..d480c108 100644
--- a/src/log_config.py
+++ b/src/log_config.py
@@ -35,3 +35,4 @@
 
 # Disable urllib debug messages
 logging.getLogger("urllib3").propagate = False
+logging.getLogger("ccxt").propagate = False
diff --git a/src/price_data.py b/src/price_data.py
index 54c4708d..268802a7 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -32,6 +32,7 @@
 import misc
 import transaction
 from core import kraken_pair_map
+from graph import PricePath
 
 log = logging.getLogger(__name__)
 
@@ -43,6 +44,9 @@
 
 
 class PriceData:
+    def __init__(self):
+        self.path=PricePath()
+
     def get_db_path(self, platform: str) -> Path:
         return Path(config.DATA_PATH, f"{platform}.db")
 
@@ -417,126 +421,122 @@ def get_cost(
             return price * tr.sold
         raise NotImplementedError
     
-    def get_candles(self, start: int, stop: int, symbol: str) ->list:
-        if self.exchange.has['fetchOHLCV']:
-            sleep(self.exchange.rateLimit / 1000)  # time.sleep wants seconds
+    def get_candles(self, start: int, stop: int, symbol: str,exchange: str) ->list:
+        exchange_class = getattr(ccxt, exchange)
+        exchange = exchange_class()
+        if exchange.has['fetchOHLCV']:
+            sleep(exchange.rateLimit / 1000)  # time.sleep wants seconds
             # get 2min before and after range
-            return self.exchange.fetch_ohlcv(symbol, '1m', start-1000*60*2, max(int((stop-start)/1000/60)+5, 1))
+            startval=start-1000*60*2
+            rang=max(int((stop-start)/1000/60)+2, 1)
+            return exchange.fetch_ohlcv(symbol, '1m', startval, rang )
         else:
-            logging.error(
+            log.error(
                 "fetchOHLCV not implemented on exchange, skipping priceloading using ohlcv")
             return None
 
-    def initialize_ccxt(self):
-        exchange_id = 'binance'
-        exchange_class = getattr(ccxt, exchange_id)
-        self.exchange = exchange_class()
-        self.markets = []
-        markets = self.exchange.fetch_markets()
-
-        for market in markets:
-            # may not apply for all exchanges, currently works for binance
-            # caches a list of all pairs on the exchange
-            self.markets.append(market["symbol"].split("/"))
-
-    def _get_bulk_pair_list(self, coin,reference_coin: str = config.FIAT) -> list:
-        def cmp_asset_pairs(our_pair: tuple[str, str], market_pair: tuple[str, str]) -> Optional[tuple[str, str, bool]]:
-            if our_pair == market_pair:
-                return *market_pair, False
-            if reversed(our_pair) == market_pair:
-                return *market_pair, True
-            return None
-        
-        def get_pair(coin, reference_coin:str):
-            our_symbols = [coin, reference_coin]
-            for market in self.markets:
-                if cmp := cmp_asset_pairs(our_symbols, market):
-                    return cmp 
-
-        if pair := get_pair(coin, reference_coin):
-            return [pair]
+    def _get_bulk_pair_data_path(self, operations: list, coin: str,reference_coin: str,preferredexchange:str="binance") ->list:
+        def merge_prices(a:list,b:list=None):
+            prices=[]
+            if not b :
+                return a
+            for i in a:
+                factor=None
+                for j in b:
+                    if i[0]==j[0]:
+                        factor=j[1]
+                        break
+                prices.append((i[0],i[1]*factor))
+            return prices
 
-        else:
-            for market in self.markets:
-                if pair:=get_pair(market[1], reference_coin):
-                    if market[0] == coin:
-                        return [(*market, False), pair]
-                    if market[1] == coin:
-                        return [(*market, True), pair]
-
-    def _get_bulk_pair_data(self, operations: list, symbol: str, invert: str=False) ->list:
         timestamps = []
         timestamppairs = []
-        data = []
-
+        maxminutes=300 #coinbasepro only allows a max of 300 minutes need a better solution
         timestamps = (op.utc_time for op in operations)
+        if not preferredexchange:
+            preferredexchange="binance"
 
         for timestamp in timestamps:
 
-            if len(timestamppairs) > 0 and timestamppairs[-1][0]+datetime.timedelta(minutes=995) > timestamp:
+            if len(timestamppairs) > 0 and timestamppairs[-1][0]+datetime.timedelta(minutes=maxminutes-4) > timestamp:
                 timestamppairs[-1].append(timestamp)
             else:
                 timestamppairs.append([timestamp])
-
+        datacomb=[]
         for batch in timestamppairs:
             # ccxt works with timestamps in milliseconds
             first = misc.to_ms_timestamp(batch[0])
             last = misc.to_ms_timestamp(batch[-1])
-            if invert:
-                tempdata = list(
-                    map(lambda x: (x[0], 1/((x[1]+x[4])/2)), self.get_candles(first, last, symbol)))
-            else:
-                tempdata = list(
-                    map(lambda x: (x[0], (x[1]+x[4])/2), self.get_candles(first, last, symbol)))
-
-            if tempdata:
-                for operation in batch:
-                    # TODO discuss which candle is picked current is closest to original date (often off by about 1-20s, but can be after the Trade)
-                    # times do not always line up perfectly so take one nearest
-                    ts = list(
-                        map(lambda x: (abs(misc.to_ms_timestamp(operation.timestamp)*1000-x[0]), x), tempdata))
-                    data.append((operation, min(ts, key=lambda x: x[0])[1][1]))
-        return data
-
-    def preload_price_data(self, operations: list, coin: str):
+            firststr=batch[0].strftime('%d-%b-%Y (%H:%M)')
+            laststr=batch[-1].strftime('%d-%b-%Y (%H:%M)')
+            log.info(f"getting data from {str(firststr)} to {str(laststr)} for {str(coin)}")
+            path=self.path.getpath(coin,reference_coin,first,last,preferredexchange=preferredexchange)
+            for p in path:
+                tempdatalis=[]
+                printstr=[ a[1]["symbol"] for a in  p[1] ]
+                log.debug(f"found path over {' -> '.join(printstr)}")
+                for i in range(len(p[1])):
+                    tempdatalis.append([])
+                    symbol=p[1][i][1]["symbol"]
+                    exchange=p[1][i][1]["exchange"]
+                    invert=p[1][i][1]["inverted"]
+                    candles=self.get_candles(first, last, symbol,exchange)
+                    if invert:
+                        tempdata = list(
+                            map(lambda x: (x[0], 1/((x[1]+x[4])/2)), candles))
+                    else:
+                        tempdata = list(
+                            map(lambda x: (x[0], (x[1]+x[4])/2), candles))
+
+                    if tempdata:
+                        for operation in batch:
+                            # TODO discuss which candle is picked current is closest to original date (often off by about 1-20s, but can be after the Trade)
+                            # times do not always line up perfectly so take one nearest
+                            ts = list(
+                                map(lambda x: (abs(misc.to_ms_timestamp(operation)*1000-x[0]), x), tempdata))
+                            tempdatalis[i].append((operation, min(ts, key=lambda x: x[0])[1][1]))
+                    else:
+                        tempdatalis=[]
+                        self.path.change_prio(printstr,0.2) # do not try already failed again
+                        break
+                if tempdatalis:
+                    wantedlen=len(tempdatalis[0])
+                    for li in tempdatalis:
+                        if not len(li)==wantedlen:
+                            self.path.change_prio(printstr,0.2)
+                            break
+                    else:
+                        prices=[]
+                        for d in tempdatalis:
+                            prices=merge_prices(d,prices)
+                        datacomb.extend(prices)
+                        break
+                log.debug("path failed trying new path")
+     
+        return datacomb
+
+    def preload_price_data_path(self,operations: list, coin: str,exchange:str=None):
+        
+            
 
         reference_coin = config.FIAT
         # get pairs used for calculating the price
-        db_path = self.get_db_path("binance")
         operations_filtered = []
+        
         tablename = self.get_tablename(coin, reference_coin)
+        operations_filtered = [op for op in operations if not self.__get_price_db(self.get_db_path(op.platform), tablename, op.utc_time)]
+        operations_grouped={}
+        if operations_filtered:
+            for i in operations_filtered:
+                if i.coin==config.FIAT:
+                    pass
+                elif operations_grouped.get(i.platform):
+                    operations_grouped[i.platform].append(i)
+                else:
+                    operations_grouped[i.platform]=[i]
+            for platf in operations_grouped.keys():
+                data=self._get_bulk_pair_data_path(operations_grouped[platf],coin,reference_coin,preferredexchange=platf)
+                for p in data:
+                    self.set_price_db(platf,coin,reference_coin, p[0], p[1])
 
-        if lis:=self._get_bulk_pair_list(coin,reference_coin):
-
-            operations_filtered = [op for op in operations if not self.__get_price_db(db_path, tablename, op.utc_time)]
-
-            # len 1== direct pairing with base currency
-            if len(lis) == 1 and lis[0]:
-                data = self._get_bulk_pair_data(
-                    operations_filtered, f"{lis[0][0]}/{lis[0][1]}", lis[0][2])
-                for element in data:
-                    self.__set_price_db(db_path, tablename,
-                                        element[0], element[1])
-
-            # len 2 == calculates price using two pairs e.g IOTA/ETH + ETH/EUR
-            elif len(lis) == 2 and lis[0] and lis[1]:
-                # get data for first pair
-                data = self._get_bulk_pair_data(
-                    operations_filtered, f"{lis[0][0]}/{lis[0][1]}", lis[0][2])
-                # get data for second pair
-                data2 = self._get_bulk_pair_data(
-                    operations_filtered, f"{lis[1][0]}/{lis[1][1]}", lis[1][2])
-
-                for element in data:
-                    factor = None
-
-                    for element2 in data2:
-                        if element[0] == element2[0]:
-                            factor = element2[1]
-                            break
 
-                    if factor:
-                        price = element[1]*factor
-                        # check if timestamp already exists to prevent a duplicate error
-                        self.set_price_db(
-                            db_path, tablename, element[0], price)
diff --git a/src/taxman.py b/src/taxman.py
index 58fd5aea..1e3e7e75 100644
--- a/src/taxman.py
+++ b/src/taxman.py
@@ -46,7 +46,7 @@ def __init__(self, book: Book, price_data: PriceData) -> None:
         except AttributeError:
             raise NotImplementedError(f"Unable to evaluate taxation for {country=}.")
         
-        self.price_data.initialize_ccxt()
+        
 
         if config.PRINCIPLE == core.Principle.FIFO:
             self.BalanceType = balance_queue.BalanceQueue
@@ -180,7 +180,7 @@ def evaluate_taxation(self) -> None:
         log.debug("Starting evaluation...")
         for coin, operations in misc.group_by(self.book.operations, "coin").items():
             operations = sorted(operations, key=lambda op: op.utc_time)
-            self.price_data.preload_price_data(operations,coin)
+            self.price_data.preload_price_data_path(operations,coin)
             self.__evaluate_taxation(coin, operations)
 
     def print_evaluation(self) -> None:

From 74935e1713f1260af3e539c43000a92540c7f795 Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Thu, 8 Apr 2021 10:03:42 +0200
Subject: [PATCH 09/53] better batching implementation

---
 src/price_data.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/price_data.py b/src/price_data.py
index 268802a7..f8b4d9f6 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -456,11 +456,12 @@ def merge_prices(a:list,b:list=None):
         if not preferredexchange:
             preferredexchange="binance"
 
+        current_first = None
         for timestamp in timestamps:
-
-            if len(timestamppairs) > 0 and timestamppairs[-1][0]+datetime.timedelta(minutes=maxminutes-4) > timestamp:
+            if current_first and current_first+datetime.timedelta(minutes=maxminutes-4) > timestamp:
                 timestamppairs[-1].append(timestamp)
             else:
+                current_first = timestamp
                 timestamppairs.append([timestamp])
         datacomb=[]
         for batch in timestamppairs:

From 48c24c8ce447b609a3c01fc3b5cb393d0275a5dc Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Thu, 8 Apr 2021 10:18:14 +0200
Subject: [PATCH 10/53] revert accidental change

---
 src/balance_queue.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/balance_queue.py b/src/balance_queue.py
index d98aa942..a8c8594e 100644
--- a/src/balance_queue.py
+++ b/src/balance_queue.py
@@ -125,7 +125,7 @@ def sell(self, change: decimal.Decimal) -> Optional[list[transaction.SoldCoin]]:
                 return None
 
             not_sold = bop.op.change - bop.sold
-            assert not_sold >= 0
+            assert not_sold > 0
 
             if not_sold > change:
                 bop.sold += change

From 50378a6997e5fa68edaf21a8d8874fc4f44706b4 Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Thu, 8 Apr 2021 11:50:30 +0200
Subject: [PATCH 11/53] formatting

---
 src/graph.py      |  75 ++++++++++++++++----------------
 src/price_data.py | 106 +++++++++++++++++++++++-----------------------
 2 files changed, 92 insertions(+), 89 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index fe80af69..7e49b596 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -5,17 +5,17 @@
 
 class PricePath:
 
-    def __init__(self, exchanges:list=None,gdict:dict=None, cache:dict=None):
+    def __init__(self, exchanges: list = None, gdict: dict = None, cache: dict = None):
         if not gdict:
             gdict = {}
         if not cache:
             cache = {}
         if not exchanges:
-            exchanges = ["binance","coinbasepro"]
+            exchanges = ["binance", "coinbasepro"]
         self.gdict = gdict
         self.cache = cache
-        self.priority={}
-        allpairs=[]
+        self.priority = {}
+        allpairs = []
 
         for exchange_id in exchanges:
             exchange_class = getattr(ccxt, exchange_id)
@@ -38,9 +38,9 @@ def __init__(self, exchanges:list=None,gdict:dict=None, cache:dict=None):
             self.addVertex(base)
             self.addVertex(quote)
             self.addEdge(base, quote, {
-                    "exchange": i[2], "symbol": i[3], "inverted": False})
+                "exchange": i[2], "symbol": i[3], "inverted": False})
             self.addEdge(quote, base, {
-                    "exchange": i[2], "symbol": i[3], "inverted": True})
+                "exchange": i[2], "symbol": i[3], "inverted": True})
 
     def edges(self):
         return self.findedges()
@@ -59,8 +59,8 @@ def getVertices(self):
 
 # Add the vertex as a key
     def addVertex(self, vrtx):
-       if vrtx not in self.gdict:
-           self.gdict[vrtx] = []
+        if vrtx not in self.gdict:
+            self.gdict[vrtx] = []
 
     def addEdge(self, vrtx1, vrtx2, data):
         if vrtx1 in self.gdict:
@@ -78,7 +78,7 @@ def _getpath(self, start, stop, maxdepth, depth=0):
                     paths.append(edge)
                 else:
                     path = self._getpath(
-                        edge[0], stop, maxdepth, depth=depth+1)
+                        edge[0], stop, maxdepth, depth=depth + 1)
                     if len(path) and path is not None:
                         for p in path:
                             if p[0] == stop:
@@ -87,34 +87,34 @@ def _getpath(self, start, stop, maxdepth, depth=0):
                                 paths.append(newpath)
         return paths
 
-    def change_prio(self,key,value):
-        ke="-".join(key)
+    def change_prio(self, key, value):
+        ke = "-".join(key)
         if self.priority.get(ke):
-            self.priority[ke]+=value
+            self.priority[ke] += value
         else:
-            self.priority[ke]=value
+            self.priority[ke] = value
 
     def getpath(self, start, stop, starttime=0, stoptime=0, preferredexchange=None, maxdepth=3):
         def comb_sort_key(path):
             if preferredexchange:
                 # prioritze pairs with the preferred exchange
-                volume=1
-                volumenew=0
-                if not (priority:=self.priority.get("-".join([ a[1]["symbol"] for a in  path]))):
-                    priority=0
+                volume = 1
+                volumenew = 0
+                if not (priority := self.priority.get("-".join([a[1]["symbol"] for a in path]))):
+                    priority = 0
                 for c in [a if (a := check_cache(pair)) else None for pair in path]:
                     if c and c[0]:
-                        if c[1][1]["stoptime"]==0:
+                        if c[1][1]["stoptime"] == 0:
                             break
-                        elif c[1][1]["avg_vol"]!=0:
-                            volumenew+=c[1][1]["avg_vol"] #is very much off because volume is not in the same currency something for later
-
+                        elif c[1][1]["avg_vol"] != 0:
+                            # is very much off because volume is not in the same currency something for later
+                            volumenew += c[1][1]["avg_vol"]
 
                     else:
                         break
                 else:
-                    volume=1/volumenew
-                return len(path)+sum([0 if pair[1]["exchange"] == preferredexchange else 1 for pair in path])+volume+priority
+                    volume = 1 / volumenew
+                return len(path) + sum([0 if pair[1]["exchange"] == preferredexchange else 1 for pair in path]) + volume + priority
             else:
                 return len(path)
 
@@ -122,7 +122,7 @@ def check_cache(pair):
 
             if pair[1].get("starttime") or pair[1].get("stoptime"):
                 return True, pair
-            if cacheres := self.cache.get(pair[1]["exchange"]+pair[1]["symbol"]):
+            if cacheres := self.cache.get(pair[1]["exchange"] + pair[1]["symbol"]):
                 pair[1]["starttime"] = cacheres[0]
                 pair[1]["stoptime"] = cacheres[1]
                 pair[1]["avg_vol"] = cacheres[2]
@@ -133,19 +133,19 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
             rangeinms = 0
             timeframe = int(6.048e+8)  # week in ms
             if starttimestamp == 0:
-                starttimestamp = 1325372400*1000
+                starttimestamp = 1325372400 * 1000
             if stoptimestamp == -1:
                 stoptimestamp = time_ns() // 1_000_000  # get cur time in ms
             starttimestamp -= timeframe  # to handle edge cases
             if stoptimestamp > starttimestamp:
-                rangeinms = stoptimestamp-starttimestamp
+                rangeinms = stoptimestamp - starttimestamp
             else:
                 rangeinms = 0  # maybe throw error
 
             # add one candle to the end to ensure the needed timeslot is in the requested candles
-            rangeincandles = int(rangeinms/timeframe)+1
+            rangeincandles = int(rangeinms / timeframe) + 1
 
-            #todo: cache already used pairs
+            # todo: cache already used pairs
             globalstarttime = 0
             globalstoptime = 0
             for i in range(len(path)):
@@ -153,7 +153,8 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
                 if not cached:
                     exchange_class = getattr(ccxt, path[i][1]["exchange"])
                     exchange = exchange_class()
-                    sleep(exchange.rateLimit / 1000) #maybe a more elaborate ratelimit wich counts execution time to waiting
+                    # maybe a more elaborate ratelimit wich counts execution time to waiting
+                    sleep(exchange.rateLimit / 1000)
                     timeframeexchange = exchange.timeframes.get("1w")
                     if timeframeexchange:  # this must be handled better maybe choose timeframe dynamically
                         # maybe cache this per pair
@@ -163,9 +164,9 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
                         ohlcv = []  # do not check fail later
                     if len(ohlcv) > 1:
                         # (candle ends after the date + timeframe)
-                        path[i][1]["stoptime"] = ohlcv[-1][0]+timeframe
+                        path[i][1]["stoptime"] = ohlcv[-1][0] + timeframe
                         path[i][1]["avg_vol"] = sum(
-                            [vol[-1] for vol in ohlcv])/len(ohlcv)  # avg vol in curr
+                            [vol[-1] for vol in ohlcv]) / len(ohlcv)  # avg vol in curr
                         path[i][1]["starttime"] = ohlcv[0][0]
                         if path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0:
                             globalstoptime = path[i][1]["stoptime"]
@@ -175,13 +176,13 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
                         path[i][1]["stoptime"] = 0
                         path[i][1]["starttime"] = 0
                         path[i][1]["avg_vol"] = 0
-                    self.cache[path[i][1]["exchange"]+path[i][1]["symbol"]] = (
+                    self.cache[path[i][1]["exchange"] + path[i][1]["symbol"]] = (
                         path[i][1]["starttime"], path[i][1]["stoptime"], path[i][1]["avg_vol"])
                 else:
 
-                    if (path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0) and path[i][1]["stoptime"]!=0:
+                    if (path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0) and path[i][1]["stoptime"] != 0:
                         globalstoptime = path[i][1]["stoptime"]
-                    if path[i][1]["starttime"] > globalstarttime :
+                    if path[i][1]["starttime"] > globalstarttime:
                         globalstarttime = path[i][1]["starttime"]
                     ohlcv = []
             return (globalstarttime, globalstoptime), path
@@ -190,7 +191,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
         paths = self._getpath(start, stop, maxdepth)
         # sort by path length to get minimal conversion chain to reduce error
         paths = sorted(paths, key=comb_sort_key)
-        #get timeframe in which a path is viable 
+        # get timeframe in which a path is viable
         for path in paths:
             timest, newpath = get_active_timeframe(path)
             # this is implemented as a generator (hence the yield) to reduce the amount of computing needed. if the first
@@ -202,7 +203,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
             elif stoptime == 0:
                 if starttime > timest[0]:
                     yield timest, newpath
-            
+
             else:
                 if stoptime < timest[1] and starttime > timest[0]:
                     yield timest, newpath
@@ -216,6 +217,6 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
     preferredexchange = "binance"
     path = g.getpath(start, to, maxdepth=2,
                      preferredexchange=preferredexchange)
-    #debug only in actual use we would iterate over the path object fetching new paths as needed
+    # debug only in actual use we would iterate over the path object fetching new paths as needed
     path = list(path)
     print(len(path))
diff --git a/src/price_data.py b/src/price_data.py
index f8b4d9f6..8c36ab69 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -45,7 +45,7 @@
 
 class PriceData:
     def __init__(self):
-        self.path=PricePath()
+        self.path = PricePath()
 
     def get_db_path(self, platform: str) -> Path:
         return Path(config.DATA_PATH, f"{platform}.db")
@@ -420,124 +420,126 @@ def get_cost(
         if isinstance(tr, transaction.SoldCoin):
             return price * tr.sold
         raise NotImplementedError
-    
-    def get_candles(self, start: int, stop: int, symbol: str,exchange: str) ->list:
+
+    def get_candles(self, start: int, stop: int, symbol: str, exchange: str) -> list:
         exchange_class = getattr(ccxt, exchange)
         exchange = exchange_class()
         if exchange.has['fetchOHLCV']:
             sleep(exchange.rateLimit / 1000)  # time.sleep wants seconds
             # get 2min before and after range
-            startval=start-1000*60*2
-            rang=max(int((stop-start)/1000/60)+2, 1)
-            return exchange.fetch_ohlcv(symbol, '1m', startval, rang )
+            startval = start - 1000 * 60 * 2
+            rang = max(int((stop - start) / 1000 / 60) + 2, 1)
+            return exchange.fetch_ohlcv(symbol, '1m', startval, rang)
         else:
             log.error(
                 "fetchOHLCV not implemented on exchange, skipping priceloading using ohlcv")
             return None
 
-    def _get_bulk_pair_data_path(self, operations: list, coin: str,reference_coin: str,preferredexchange:str="binance") ->list:
-        def merge_prices(a:list,b:list=None):
-            prices=[]
-            if not b :
+    def _get_bulk_pair_data_path(self, operations: list, coin: str, reference_coin: str, preferredexchange: str = "binance") -> list:
+        def merge_prices(a: list, b: list = None):
+            prices = []
+            if not b:
                 return a
             for i in a:
-                factor=None
+                factor = None
                 for j in b:
-                    if i[0]==j[0]:
-                        factor=j[1]
+                    if i[0] == j[0]:
+                        factor = j[1]
                         break
-                prices.append((i[0],i[1]*factor))
+                prices.append((i[0], i[1] * factor))
             return prices
 
         timestamps = []
         timestamppairs = []
-        maxminutes=300 #coinbasepro only allows a max of 300 minutes need a better solution
+        maxminutes = 300  # coinbasepro only allows a max of 300 minutes need a better solution
         timestamps = (op.utc_time for op in operations)
         if not preferredexchange:
-            preferredexchange="binance"
+            preferredexchange = "binance"
 
         current_first = None
         for timestamp in timestamps:
-            if current_first and current_first+datetime.timedelta(minutes=maxminutes-4) > timestamp:
+            if current_first and current_first + datetime.timedelta(minutes=maxminutes - 4) > timestamp:
                 timestamppairs[-1].append(timestamp)
             else:
                 current_first = timestamp
                 timestamppairs.append([timestamp])
-        datacomb=[]
+        datacomb = []
         for batch in timestamppairs:
             # ccxt works with timestamps in milliseconds
             first = misc.to_ms_timestamp(batch[0])
             last = misc.to_ms_timestamp(batch[-1])
-            firststr=batch[0].strftime('%d-%b-%Y (%H:%M)')
-            laststr=batch[-1].strftime('%d-%b-%Y (%H:%M)')
-            log.info(f"getting data from {str(firststr)} to {str(laststr)} for {str(coin)}")
-            path=self.path.getpath(coin,reference_coin,first,last,preferredexchange=preferredexchange)
+            firststr = batch[0].strftime('%d-%b-%Y (%H:%M)')
+            laststr = batch[-1].strftime('%d-%b-%Y (%H:%M)')
+            log.info(
+                f"getting data from {str(firststr)} to {str(laststr)} for {str(coin)}")
+            path = self.path.getpath(coin, reference_coin, first,
+                                     last, preferredexchange=preferredexchange)
             for p in path:
-                tempdatalis=[]
-                printstr=[ a[1]["symbol"] for a in  p[1] ]
+                tempdatalis = []
+                printstr = [a[1]["symbol"] for a in p[1]]
                 log.debug(f"found path over {' -> '.join(printstr)}")
                 for i in range(len(p[1])):
                     tempdatalis.append([])
-                    symbol=p[1][i][1]["symbol"]
-                    exchange=p[1][i][1]["exchange"]
-                    invert=p[1][i][1]["inverted"]
-                    candles=self.get_candles(first, last, symbol,exchange)
+                    symbol = p[1][i][1]["symbol"]
+                    exchange = p[1][i][1]["exchange"]
+                    invert = p[1][i][1]["inverted"]
+                    candles = self.get_candles(first, last, symbol, exchange)
                     if invert:
                         tempdata = list(
-                            map(lambda x: (x[0], 1/((x[1]+x[4])/2)), candles))
+                            map(lambda x: (x[0], 1 / ((x[1] + x[4]) / 2)), candles))
                     else:
                         tempdata = list(
-                            map(lambda x: (x[0], (x[1]+x[4])/2), candles))
+                            map(lambda x: (x[0], (x[1] + x[4]) / 2), candles))
 
                     if tempdata:
                         for operation in batch:
                             # TODO discuss which candle is picked current is closest to original date (often off by about 1-20s, but can be after the Trade)
                             # times do not always line up perfectly so take one nearest
                             ts = list(
-                                map(lambda x: (abs(misc.to_ms_timestamp(operation)*1000-x[0]), x), tempdata))
-                            tempdatalis[i].append((operation, min(ts, key=lambda x: x[0])[1][1]))
+                                map(lambda x: (abs(misc.to_ms_timestamp(operation) * 1000 - x[0]), x), tempdata))
+                            tempdatalis[i].append(
+                                (operation, min(ts, key=lambda x: x[0])[1][1]))
                     else:
-                        tempdatalis=[]
-                        self.path.change_prio(printstr,0.2) # do not try already failed again
+                        tempdatalis = []
+                        # do not try already failed again
+                        self.path.change_prio(printstr, 0.2)
                         break
                 if tempdatalis:
-                    wantedlen=len(tempdatalis[0])
+                    wantedlen = len(tempdatalis[0])
                     for li in tempdatalis:
-                        if not len(li)==wantedlen:
-                            self.path.change_prio(printstr,0.2)
+                        if not len(li) == wantedlen:
+                            self.path.change_prio(printstr, 0.2)
                             break
                     else:
-                        prices=[]
+                        prices = []
                         for d in tempdatalis:
-                            prices=merge_prices(d,prices)
+                            prices = merge_prices(d, prices)
                         datacomb.extend(prices)
                         break
                 log.debug("path failed trying new path")
-     
+
         return datacomb
 
-    def preload_price_data_path(self,operations: list, coin: str,exchange:str=None):
-        
-            
+    def preload_price_data_path(self, operations: list, coin: str, exchange: str = None):
 
         reference_coin = config.FIAT
         # get pairs used for calculating the price
         operations_filtered = []
-        
+
         tablename = self.get_tablename(coin, reference_coin)
-        operations_filtered = [op for op in operations if not self.__get_price_db(self.get_db_path(op.platform), tablename, op.utc_time)]
-        operations_grouped={}
+        operations_filtered = [op for op in operations if not self.__get_price_db(
+            self.get_db_path(op.platform), tablename, op.utc_time)]
+        operations_grouped = {}
         if operations_filtered:
             for i in operations_filtered:
-                if i.coin==config.FIAT:
+                if i.coin == config.FIAT:
                     pass
                 elif operations_grouped.get(i.platform):
                     operations_grouped[i.platform].append(i)
                 else:
-                    operations_grouped[i.platform]=[i]
+                    operations_grouped[i.platform] = [i]
             for platf in operations_grouped.keys():
-                data=self._get_bulk_pair_data_path(operations_grouped[platf],coin,reference_coin,preferredexchange=platf)
+                data = self._get_bulk_pair_data_path(
+                    operations_grouped[platf], coin, reference_coin, preferredexchange=platf)
                 for p in data:
-                    self.set_price_db(platf,coin,reference_coin, p[0], p[1])
-
-
+                    self.set_price_db(platf, coin, reference_coin, p[0], p[1])

From e1f4581162c6c02263bed0dc2c333e648ed29b83 Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Thu, 8 Apr 2021 12:19:46 +0200
Subject: [PATCH 12/53] formatting

---
 src/graph.py      | 95 ++++++++++++++++++++++++++++++++---------------
 src/price_data.py | 81 +++++++++++++++++++++++++++++-----------
 src/taxman.py     |  2 +-
 3 files changed, 127 insertions(+), 51 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index 7e49b596..f9d98271 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -1,10 +1,10 @@
-import ccxt
 from datetime import datetime
 from time import sleep, time_ns
 
+import ccxt
 
-class PricePath:
 
+class PricePath:
     def __init__(self, exchanges: list = None, gdict: dict = None, cache: dict = None):
         if not gdict:
             gdict = {}
@@ -22,29 +22,34 @@ def __init__(self, exchanges: list = None, gdict: dict = None, cache: dict = Non
             exchange = exchange_class()
             markets = []
             markets = exchange.fetch_markets()
-            if exchange.has['fetchOHLCV']:
+            if exchange.has["fetchOHLCV"]:
 
                 allpairs.extend(
-                    [(i["base"], i["quote"], exchange_id, i["symbol"])for i in markets])
+                    [(i["base"], i["quote"], exchange_id, i["symbol"]) for i in markets]
+                )
             else:
                 print(
-                    f"{exchange.name} Does not support fetch ohlcv. ignoring exchange and {len(markets)} pairs.")
+                    f"{exchange.name} Does not support fetch ohlcv. ignoring exchange and {len(markets)} pairs."
+                )
         allpairs = list(set(allpairs))
-        #print("Total Pairs to check:", len(allpairs))
+        # print("Total Pairs to check:", len(allpairs))
         allpairs.sort(key=lambda x: x[3])
         for i in allpairs:
             base = i[0]
             quote = i[1]
             self.addVertex(base)
             self.addVertex(quote)
-            self.addEdge(base, quote, {
-                "exchange": i[2], "symbol": i[3], "inverted": False})
-            self.addEdge(quote, base, {
-                "exchange": i[2], "symbol": i[3], "inverted": True})
+            self.addEdge(
+                base, quote, {"exchange": i[2], "symbol": i[3], "inverted": False}
+            )
+            self.addEdge(
+                quote, base, {"exchange": i[2], "symbol": i[3], "inverted": True}
+            )
 
     def edges(self):
         return self.findedges()
-# Find the distinct list of edges
+
+    # Find the distinct list of edges
 
     def findedges(self):
         edgename = []
@@ -57,7 +62,7 @@ def findedges(self):
     def getVertices(self):
         return list(self.gdict.keys())
 
-# Add the vertex as a key
+    # Add the vertex as a key
     def addVertex(self, vrtx):
         if vrtx not in self.gdict:
             self.gdict[vrtx] = []
@@ -73,16 +78,21 @@ def _getpath(self, start, stop, maxdepth, depth=0):
         if (edges := self.gdict.get(start)) and maxdepth > depth:
             for edge in edges:
                 if depth == 0 and edge[0] == stop:
-                    paths.append([edge, ])
+                    paths.append(
+                        [
+                            edge,
+                        ]
+                    )
                 elif edge[0] == stop:
                     paths.append(edge)
                 else:
-                    path = self._getpath(
-                        edge[0], stop, maxdepth, depth=depth + 1)
+                    path = self._getpath(edge[0], stop, maxdepth, depth=depth + 1)
                     if len(path) and path is not None:
                         for p in path:
                             if p[0] == stop:
-                                newpath = [edge, ]
+                                newpath = [
+                                    edge,
+                                ]
                                 newpath.append(p)
                                 paths.append(newpath)
         return paths
@@ -94,13 +104,19 @@ def change_prio(self, key, value):
         else:
             self.priority[ke] = value
 
-    def getpath(self, start, stop, starttime=0, stoptime=0, preferredexchange=None, maxdepth=3):
+    def getpath(
+        self, start, stop, starttime=0, stoptime=0, preferredexchange=None, maxdepth=3
+    ):
         def comb_sort_key(path):
             if preferredexchange:
                 # prioritze pairs with the preferred exchange
                 volume = 1
                 volumenew = 0
-                if not (priority := self.priority.get("-".join([a[1]["symbol"] for a in path]))):
+                if not (
+                    priority := self.priority.get(
+                        "-".join([a[1]["symbol"] for a in path])
+                    )
+                ):
                     priority = 0
                 for c in [a if (a := check_cache(pair)) else None for pair in path]:
                     if c and c[0]:
@@ -114,7 +130,17 @@ def comb_sort_key(path):
                         break
                 else:
                     volume = 1 / volumenew
-                return len(path) + sum([0 if pair[1]["exchange"] == preferredexchange else 1 for pair in path]) + volume + priority
+                return (
+                    len(path)
+                    + sum(
+                        [
+                            0 if pair[1]["exchange"] == preferredexchange else 1
+                            for pair in path
+                        ]
+                    )
+                    + volume
+                    + priority
+                )
             else:
                 return len(path)
 
@@ -131,7 +157,7 @@ def check_cache(pair):
 
         def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
             rangeinms = 0
-            timeframe = int(6.048e+8)  # week in ms
+            timeframe = int(6.048e8)  # week in ms
             if starttimestamp == 0:
                 starttimestamp = 1325372400 * 1000
             if stoptimestamp == -1:
@@ -156,19 +182,26 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
                     # maybe a more elaborate ratelimit wich counts execution time to waiting
                     sleep(exchange.rateLimit / 1000)
                     timeframeexchange = exchange.timeframes.get("1w")
-                    if timeframeexchange:  # this must be handled better maybe choose timeframe dynamically
+                    if (
+                        timeframeexchange
+                    ):  # this must be handled better maybe choose timeframe dynamically
                         # maybe cache this per pair
                         ohlcv = exchange.fetch_ohlcv(
-                            path[i][1]["symbol"], "1w", starttimestamp, rangeincandles)
+                            path[i][1]["symbol"], "1w", starttimestamp, rangeincandles
+                        )
                     else:
                         ohlcv = []  # do not check fail later
                     if len(ohlcv) > 1:
                         # (candle ends after the date + timeframe)
                         path[i][1]["stoptime"] = ohlcv[-1][0] + timeframe
-                        path[i][1]["avg_vol"] = sum(
-                            [vol[-1] for vol in ohlcv]) / len(ohlcv)  # avg vol in curr
+                        path[i][1]["avg_vol"] = sum([vol[-1] for vol in ohlcv]) / len(
+                            ohlcv
+                        )  # avg vol in curr
                         path[i][1]["starttime"] = ohlcv[0][0]
-                        if path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0:
+                        if (
+                            path[i][1]["stoptime"] < globalstoptime
+                            or globalstoptime == 0
+                        ):
                             globalstoptime = path[i][1]["stoptime"]
                         if path[i][1]["starttime"] > globalstarttime:
                             globalstarttime = path[i][1]["starttime"]
@@ -177,10 +210,15 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
                         path[i][1]["starttime"] = 0
                         path[i][1]["avg_vol"] = 0
                     self.cache[path[i][1]["exchange"] + path[i][1]["symbol"]] = (
-                        path[i][1]["starttime"], path[i][1]["stoptime"], path[i][1]["avg_vol"])
+                        path[i][1]["starttime"],
+                        path[i][1]["stoptime"],
+                        path[i][1]["avg_vol"],
+                    )
                 else:
 
-                    if (path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0) and path[i][1]["stoptime"] != 0:
+                    if (
+                        path[i][1]["stoptime"] < globalstoptime or globalstoptime == 0
+                    ) and path[i][1]["stoptime"] != 0:
                         globalstoptime = path[i][1]["stoptime"]
                     if path[i][1]["starttime"] > globalstarttime:
                         globalstarttime = path[i][1]["starttime"]
@@ -215,8 +253,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
     start = "IOTA"
     to = "EUR"
     preferredexchange = "binance"
-    path = g.getpath(start, to, maxdepth=2,
-                     preferredexchange=preferredexchange)
+    path = g.getpath(start, to, maxdepth=2, preferredexchange=preferredexchange)
     # debug only in actual use we would iterate over the path object fetching new paths as needed
     path = list(path)
     print(len(path))
diff --git a/src/price_data.py b/src/price_data.py
index 8c36ab69..3cd3d8eb 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -22,11 +22,11 @@
 import sqlite3
 import time
 from pathlib import Path
-from typing import Any, Optional, Union
 from time import sleep
+from typing import Any, Optional, Union
 
-import requests
 import ccxt
+import requests
 
 import config
 import misc
@@ -424,18 +424,25 @@ def get_cost(
     def get_candles(self, start: int, stop: int, symbol: str, exchange: str) -> list:
         exchange_class = getattr(ccxt, exchange)
         exchange = exchange_class()
-        if exchange.has['fetchOHLCV']:
+        if exchange.has["fetchOHLCV"]:
             sleep(exchange.rateLimit / 1000)  # time.sleep wants seconds
             # get 2min before and after range
             startval = start - 1000 * 60 * 2
             rang = max(int((stop - start) / 1000 / 60) + 2, 1)
-            return exchange.fetch_ohlcv(symbol, '1m', startval, rang)
+            return exchange.fetch_ohlcv(symbol, "1m", startval, rang)
         else:
             log.error(
-                "fetchOHLCV not implemented on exchange, skipping priceloading using ohlcv")
+                "fetchOHLCV not implemented on exchange, skipping priceloading using ohlcv"
+            )
             return None
 
-    def _get_bulk_pair_data_path(self, operations: list, coin: str, reference_coin: str, preferredexchange: str = "binance") -> list:
+    def _get_bulk_pair_data_path(
+        self,
+        operations: list,
+        coin: str,
+        reference_coin: str,
+        preferredexchange: str = "binance",
+    ) -> list:
         def merge_prices(a: list, b: list = None):
             prices = []
             if not b:
@@ -451,14 +458,20 @@ def merge_prices(a: list, b: list = None):
 
         timestamps = []
         timestamppairs = []
-        maxminutes = 300  # coinbasepro only allows a max of 300 minutes need a better solution
+        maxminutes = (
+            300  # coinbasepro only allows a max of 300 minutes need a better solution
+        )
         timestamps = (op.utc_time for op in operations)
         if not preferredexchange:
             preferredexchange = "binance"
 
         current_first = None
         for timestamp in timestamps:
-            if current_first and current_first + datetime.timedelta(minutes=maxminutes - 4) > timestamp:
+            if (
+                current_first
+                and current_first + datetime.timedelta(minutes=maxminutes - 4)
+                > timestamp
+            ):
                 timestamppairs[-1].append(timestamp)
             else:
                 current_first = timestamp
@@ -468,12 +481,14 @@ def merge_prices(a: list, b: list = None):
             # ccxt works with timestamps in milliseconds
             first = misc.to_ms_timestamp(batch[0])
             last = misc.to_ms_timestamp(batch[-1])
-            firststr = batch[0].strftime('%d-%b-%Y (%H:%M)')
-            laststr = batch[-1].strftime('%d-%b-%Y (%H:%M)')
+            firststr = batch[0].strftime("%d-%b-%Y (%H:%M)")
+            laststr = batch[-1].strftime("%d-%b-%Y (%H:%M)")
             log.info(
-                f"getting data from {str(firststr)} to {str(laststr)} for {str(coin)}")
-            path = self.path.getpath(coin, reference_coin, first,
-                                     last, preferredexchange=preferredexchange)
+                f"getting data from {str(firststr)} to {str(laststr)} for {str(coin)}"
+            )
+            path = self.path.getpath(
+                coin, reference_coin, first, last, preferredexchange=preferredexchange
+            )
             for p in path:
                 tempdatalis = []
                 printstr = [a[1]["symbol"] for a in p[1]]
@@ -486,19 +501,32 @@ def merge_prices(a: list, b: list = None):
                     candles = self.get_candles(first, last, symbol, exchange)
                     if invert:
                         tempdata = list(
-                            map(lambda x: (x[0], 1 / ((x[1] + x[4]) / 2)), candles))
+                            map(lambda x: (x[0], 1 / ((x[1] + x[4]) / 2)), candles)
+                        )
                     else:
                         tempdata = list(
-                            map(lambda x: (x[0], (x[1] + x[4]) / 2), candles))
+                            map(lambda x: (x[0], (x[1] + x[4]) / 2), candles)
+                        )
 
                     if tempdata:
                         for operation in batch:
                             # TODO discuss which candle is picked current is closest to original date (often off by about 1-20s, but can be after the Trade)
                             # times do not always line up perfectly so take one nearest
                             ts = list(
-                                map(lambda x: (abs(misc.to_ms_timestamp(operation) * 1000 - x[0]), x), tempdata))
+                                map(
+                                    lambda x: (
+                                        abs(
+                                            misc.to_ms_timestamp(operation) * 1000
+                                            - x[0]
+                                        ),
+                                        x,
+                                    ),
+                                    tempdata,
+                                )
+                            )
                             tempdatalis[i].append(
-                                (operation, min(ts, key=lambda x: x[0])[1][1]))
+                                (operation, min(ts, key=lambda x: x[0])[1][1])
+                            )
                     else:
                         tempdatalis = []
                         # do not try already failed again
@@ -520,15 +548,22 @@ def merge_prices(a: list, b: list = None):
 
         return datacomb
 
-    def preload_price_data_path(self, operations: list, coin: str, exchange: str = None):
+    def preload_price_data_path(
+        self, operations: list, coin: str, exchange: str = None
+    ):
 
         reference_coin = config.FIAT
         # get pairs used for calculating the price
         operations_filtered = []
 
         tablename = self.get_tablename(coin, reference_coin)
-        operations_filtered = [op for op in operations if not self.__get_price_db(
-            self.get_db_path(op.platform), tablename, op.utc_time)]
+        operations_filtered = [
+            op
+            for op in operations
+            if not self.__get_price_db(
+                self.get_db_path(op.platform), tablename, op.utc_time
+            )
+        ]
         operations_grouped = {}
         if operations_filtered:
             for i in operations_filtered:
@@ -540,6 +575,10 @@ def preload_price_data_path(self, operations: list, coin: str, exchange: str = N
                     operations_grouped[i.platform] = [i]
             for platf in operations_grouped.keys():
                 data = self._get_bulk_pair_data_path(
-                    operations_grouped[platf], coin, reference_coin, preferredexchange=platf)
+                    operations_grouped[platf],
+                    coin,
+                    reference_coin,
+                    preferredexchange=platf,
+                )
                 for p in data:
                     self.set_price_db(platf, coin, reference_coin, p[0], p[1])
diff --git a/src/taxman.py b/src/taxman.py
index 1e3e7e75..6ec05165 100644
--- a/src/taxman.py
+++ b/src/taxman.py
@@ -180,7 +180,7 @@ def evaluate_taxation(self) -> None:
         log.debug("Starting evaluation...")
         for coin, operations in misc.group_by(self.book.operations, "coin").items():
             operations = sorted(operations, key=lambda op: op.utc_time)
-            self.price_data.preload_price_data_path(operations,coin)
+            self.price_data.preload_price_data_path(operations, coin)
             self.__evaluate_taxation(coin, operations)
 
     def print_evaluation(self) -> None:

From 95deecc41d4dd9807153da2be4ffe8655e889f95 Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Thu, 8 Apr 2021 12:25:18 +0200
Subject: [PATCH 13/53] fixed some mypy bugs

---
 src/graph.py      | 9 ++-------
 src/price_data.py | 8 ++++----
 2 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index f9d98271..8cc222dc 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -5,16 +5,12 @@
 
 
 class PricePath:
-    def __init__(self, exchanges: list = None, gdict: dict = None, cache: dict = None):
-        if not gdict:
-            gdict = {}
-        if not cache:
-            cache = {}
+    def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}):
         if not exchanges:
             exchanges = ["binance", "coinbasepro"]
         self.gdict = gdict
         self.cache = cache
-        self.priority = {}
+        self.priority : dict= {}
         allpairs = []
 
         for exchange_id in exchanges:
@@ -249,7 +245,6 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
 
 if __name__ == "__main__":
     g = PricePath()
-    allpairs = []
     start = "IOTA"
     to = "EUR"
     preferredexchange = "binance"
diff --git a/src/price_data.py b/src/price_data.py
index 3cd3d8eb..c1b4711b 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -423,13 +423,13 @@ def get_cost(
 
     def get_candles(self, start: int, stop: int, symbol: str, exchange: str) -> list:
         exchange_class = getattr(ccxt, exchange)
-        exchange = exchange_class()
-        if exchange.has["fetchOHLCV"]:
-            sleep(exchange.rateLimit / 1000)  # time.sleep wants seconds
+        exchange_obj = exchange_class()
+        if exchange_obj.has["fetchOHLCV"]:
+            sleep(exchange_obj.rateLimit / 1000)  # time.sleep wants seconds
             # get 2min before and after range
             startval = start - 1000 * 60 * 2
             rang = max(int((stop - start) / 1000 / 60) + 2, 1)
-            return exchange.fetch_ohlcv(symbol, "1m", startval, rang)
+            return list(exchange_obj.fetch_ohlcv(symbol, "1m", startval, rang))
         else:
             log.error(
                 "fetchOHLCV not implemented on exchange, skipping priceloading using ohlcv"

From 7976ee651036de18c7a14ef605d7bfedbd5a45af Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Thu, 8 Apr 2021 13:31:58 +0200
Subject: [PATCH 14/53] black formatting

---
 src/graph.py  | 2 +-
 src/taxman.py | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index 8cc222dc..23f93db2 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -10,7 +10,7 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}):
             exchanges = ["binance", "coinbasepro"]
         self.gdict = gdict
         self.cache = cache
-        self.priority : dict= {}
+        self.priority: dict = {}
         allpairs = []
 
         for exchange_id in exchanges:
diff --git a/src/taxman.py b/src/taxman.py
index 6ec05165..1df6a84e 100644
--- a/src/taxman.py
+++ b/src/taxman.py
@@ -45,8 +45,6 @@ def __init__(self, book: Book, price_data: PriceData) -> None:
             self.__evaluate_taxation = getattr(self, f"_evaluate_taxation_{country}")
         except AttributeError:
             raise NotImplementedError(f"Unable to evaluate taxation for {country=}.")
-        
-        
 
         if config.PRINCIPLE == core.Principle.FIFO:
             self.BalanceType = balance_queue.BalanceQueue

From a0c8df5884789d258810165c91237f6f05559f6d Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Thu, 8 Apr 2021 13:58:47 +0200
Subject: [PATCH 15/53] fix mypy

---
 src/graph.py      |  6 +++---
 src/price_data.py | 24 ++++++++++++------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index 23f93db2..70a75946 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -1,7 +1,7 @@
 from datetime import datetime
 from time import sleep, time_ns
 
-import ccxt
+import ccxt #type: ignore
 
 
 class PricePath:
@@ -10,7 +10,7 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}):
             exchanges = ["binance", "coinbasepro"]
         self.gdict = gdict
         self.cache = cache
-        self.priority: dict = {}
+        self.priority : dict= {}
         allpairs = []
 
         for exchange_id in exchanges:
@@ -29,7 +29,7 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}):
                 )
         allpairs = list(set(allpairs))
         # print("Total Pairs to check:", len(allpairs))
-        allpairs.sort(key=lambda x: x[3])
+        allpairs.sort(key=lambda x: x[3]) #type: ignore
         for i in allpairs:
             base = i[0]
             quote = i[1]
diff --git a/src/price_data.py b/src/price_data.py
index c1b4711b..0b8dffd5 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -25,7 +25,7 @@
 from time import sleep
 from typing import Any, Optional, Union
 
-import ccxt
+import ccxt #type: ignore
 import requests
 
 import config
@@ -434,7 +434,7 @@ def get_candles(self, start: int, stop: int, symbol: str, exchange: str) -> list
             log.error(
                 "fetchOHLCV not implemented on exchange, skipping priceloading using ohlcv"
             )
-            return None
+            return []
 
     def _get_bulk_pair_data_path(
         self,
@@ -443,7 +443,7 @@ def _get_bulk_pair_data_path(
         reference_coin: str,
         preferredexchange: str = "binance",
     ) -> list:
-        def merge_prices(a: list, b: list = None):
+        def merge_prices(a: list, b: list = []) -> list: 
             prices = []
             if not b:
                 return a
@@ -456,12 +456,12 @@ def merge_prices(a: list, b: list = None):
                 prices.append((i[0], i[1] * factor))
             return prices
 
-        timestamps = []
-        timestamppairs = []
+        timestamps: list = []
+        timestamppairs: list = []
         maxminutes = (
             300  # coinbasepro only allows a max of 300 minutes need a better solution
         )
-        timestamps = (op.utc_time for op in operations)
+        timestamps = (op.utc_time for op in operations) # type: ignore
         if not preferredexchange:
             preferredexchange = "binance"
 
@@ -490,7 +490,7 @@ def merge_prices(a: list, b: list = None):
                 coin, reference_coin, first, last, preferredexchange=preferredexchange
             )
             for p in path:
-                tempdatalis = []
+                tempdatalis: list = []
                 printstr = [a[1]["symbol"] for a in p[1]]
                 log.debug(f"found path over {' -> '.join(printstr)}")
                 for i in range(len(p[1])):
@@ -525,7 +525,7 @@ def merge_prices(a: list, b: list = None):
                                 )
                             )
                             tempdatalis[i].append(
-                                (operation, min(ts, key=lambda x: x[0])[1][1])
+                                (operation, min(ts, key=lambda x: x[0])[1][1]) # type: ignore
                             )
                     else:
                         tempdatalis = []
@@ -539,7 +539,7 @@ def merge_prices(a: list, b: list = None):
                             self.path.change_prio(printstr, 0.2)
                             break
                     else:
-                        prices = []
+                        prices: list = []
                         for d in tempdatalis:
                             prices = merge_prices(d, prices)
                         datacomb.extend(prices)
@@ -549,8 +549,8 @@ def merge_prices(a: list, b: list = None):
         return datacomb
 
     def preload_price_data_path(
-        self, operations: list, coin: str, exchange: str = None
-    ):
+        self, operations: list, coin: str, exchange: str = ""
+    ) -> None:
 
         reference_coin = config.FIAT
         # get pairs used for calculating the price
@@ -564,7 +564,7 @@ def preload_price_data_path(
                 self.get_db_path(op.platform), tablename, op.utc_time
             )
         ]
-        operations_grouped = {}
+        operations_grouped:dict = {}
         if operations_filtered:
             for i in operations_filtered:
                 if i.coin == config.FIAT:

From 08f34020538a8b447d2334976aa604e8f4abe81e Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Thu, 8 Apr 2021 17:22:46 +0200
Subject: [PATCH 16/53] flake compatible formatting (except E501)

---
 src/graph.py      | 22 +++++++++++++---------
 src/price_data.py | 17 ++++++++++-------
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index 70a75946..ba45674c 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -1,7 +1,6 @@
-from datetime import datetime
 from time import sleep, time_ns
 
-import ccxt #type: ignore
+import ccxt  # type: ignore
 
 
 class PricePath:
@@ -10,7 +9,7 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}):
             exchanges = ["binance", "coinbasepro"]
         self.gdict = gdict
         self.cache = cache
-        self.priority : dict= {}
+        self.priority: dict = {}
         allpairs = []
 
         for exchange_id in exchanges:
@@ -29,7 +28,7 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}):
                 )
         allpairs = list(set(allpairs))
         # print("Total Pairs to check:", len(allpairs))
-        allpairs.sort(key=lambda x: x[3]) #type: ignore
+        allpairs.sort(key=lambda x: x[3])
         for i in allpairs:
             base = i[0]
             quote = i[1]
@@ -119,7 +118,8 @@ def comb_sort_key(path):
                         if c[1][1]["stoptime"] == 0:
                             break
                         elif c[1][1]["avg_vol"] != 0:
-                            # is very much off because volume is not in the same currency something for later
+                            # is very much off because volume is not in the same
+                            # currency something for later
                             volumenew += c[1][1]["avg_vol"]
 
                     else:
@@ -164,7 +164,8 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
             else:
                 rangeinms = 0  # maybe throw error
 
-            # add one candle to the end to ensure the needed timeslot is in the requested candles
+            # add one candle to the end to ensure the needed
+            # timeslot is in the requested candles
             rangeincandles = int(rangeinms / timeframe) + 1
 
             # todo: cache already used pairs
@@ -175,7 +176,8 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
                 if not cached:
                     exchange_class = getattr(ccxt, path[i][1]["exchange"])
                     exchange = exchange_class()
-                    # maybe a more elaborate ratelimit wich counts execution time to waiting
+                    # TODO maybe a more elaborate ratelimit wich removes execution
+                    # time to from the ratelimit
                     sleep(exchange.rateLimit / 1000)
                     timeframeexchange = exchange.timeframes.get("1w")
                     if (
@@ -228,7 +230,8 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
         # get timeframe in which a path is viable
         for path in paths:
             timest, newpath = get_active_timeframe(path)
-            # this is implemented as a generator (hence the yield) to reduce the amount of computing needed. if the first
+            # this is implemented as a generator (hence the yield) to reduce
+            # the amount of computing needed. if the first path fails the next is used
             if starttime == 0 and stoptime == 0:
                 yield timest, newpath
             elif starttime == 0:
@@ -249,6 +252,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
     to = "EUR"
     preferredexchange = "binance"
     path = g.getpath(start, to, maxdepth=2, preferredexchange=preferredexchange)
-    # debug only in actual use we would iterate over the path object fetching new paths as needed
+    # debug only in actual use we would iterate over
+    # the path object fetching new paths as needed
     path = list(path)
     print(len(path))
diff --git a/src/price_data.py b/src/price_data.py
index 0b8dffd5..e65cc3dc 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -25,7 +25,7 @@
 from time import sleep
 from typing import Any, Optional, Union
 
-import ccxt #type: ignore
+import ccxt  # type: ignore
 import requests
 
 import config
@@ -432,8 +432,9 @@ def get_candles(self, start: int, stop: int, symbol: str, exchange: str) -> list
             return list(exchange_obj.fetch_ohlcv(symbol, "1m", startval, rang))
         else:
             log.error(
-                "fetchOHLCV not implemented on exchange, skipping priceloading using ohlcv"
+                "fetchOHLCV not implemented on exchange, skipping ohlcv"
             )
+            # shouldnt happen technically because exchanges are filterd for fetchohlcv
             return []
 
     def _get_bulk_pair_data_path(
@@ -443,7 +444,7 @@ def _get_bulk_pair_data_path(
         reference_coin: str,
         preferredexchange: str = "binance",
     ) -> list:
-        def merge_prices(a: list, b: list = []) -> list: 
+        def merge_prices(a: list, b: list = []) -> list:
             prices = []
             if not b:
                 return a
@@ -461,7 +462,7 @@ def merge_prices(a: list, b: list = []) -> list:
         maxminutes = (
             300  # coinbasepro only allows a max of 300 minutes need a better solution
         )
-        timestamps = (op.utc_time for op in operations) # type: ignore
+        timestamps = [op.utc_time for op in operations]
         if not preferredexchange:
             preferredexchange = "binance"
 
@@ -510,7 +511,9 @@ def merge_prices(a: list, b: list = []) -> list:
 
                     if tempdata:
                         for operation in batch:
-                            # TODO discuss which candle is picked current is closest to original date (often off by about 1-20s, but can be after the Trade)
+                            # TODO discuss which candle is picked
+                            # current is closest to original date
+                            # (often off by about 1-20s, but can be after the Trade)
                             # times do not always line up perfectly so take one nearest
                             ts = list(
                                 map(
@@ -525,7 +528,7 @@ def merge_prices(a: list, b: list = []) -> list:
                                 )
                             )
                             tempdatalis[i].append(
-                                (operation, min(ts, key=lambda x: x[0])[1][1]) # type: ignore
+                                (operation, min(ts, key=lambda x: x[0])[1][1])
                             )
                     else:
                         tempdatalis = []
@@ -564,7 +567,7 @@ def preload_price_data_path(
                 self.get_db_path(op.platform), tablename, op.utc_time
             )
         ]
-        operations_grouped:dict = {}
+        operations_grouped: dict = {}
         if operations_filtered:
             for i in operations_filtered:
                 if i.coin == config.FIAT:

From 23f38b61a69082f3afe03909246d7cc7cd283db5 Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Thu, 8 Apr 2021 17:38:27 +0200
Subject: [PATCH 17/53] documentation

---
 src/graph.py | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index ba45674c..4430cbe6 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -9,7 +9,8 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}):
             exchanges = ["binance", "coinbasepro"]
         self.gdict = gdict
         self.cache = cache
-        self.priority: dict = {}
+        self.priority: dict[str,int] = {} 
+        #saves the priority for a certain path so that bad paths can be skipped
         allpairs = []
 
         for exchange_id in exchanges:
@@ -26,9 +27,10 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}):
                 print(
                     f"{exchange.name} Does not support fetch ohlcv. ignoring exchange and {len(markets)} pairs."
                 )
-        allpairs = list(set(allpairs))
+        allpairs = list(set(allpairs)) # fast an easy deduplication
         # print("Total Pairs to check:", len(allpairs))
-        allpairs.sort(key=lambda x: x[3])
+        allpairs.sort(key=lambda x: x[3]) 
+        #sorting by symbol for pair to have the same result on every run due to the set
         for i in allpairs:
             base = i[0]
             quote = i[1]
@@ -69,6 +71,9 @@ def addEdge(self, vrtx1, vrtx2, data):
             self.gdict[vrtx1] = [vrtx2]
 
     def _getpath(self, start, stop, maxdepth, depth=0):
+        """
+        a recursive function for finding all possible paths between to edges
+        """
         paths = []
         if (edges := self.gdict.get(start)) and maxdepth > depth:
             for edge in edges:
@@ -103,6 +108,14 @@ def getpath(
         self, start, stop, starttime=0, stoptime=0, preferredexchange=None, maxdepth=3
     ):
         def comb_sort_key(path):
+            """
+            Sorting funtction which is used to prioritze paths by (in order of magnitude):
+            - smallest length -> +1 per element
+            - preferred exchange -> +1 per exchange which is not preferred
+            - priority -> +0.5 per unfinished execution of path
+            - volume (if known) -> 1/sum(avg_vol per pair) 
+            - volume (if not known) -> 1 -> always smaller if volume is known 
+            """
             if preferredexchange:
                 # prioritze pairs with the preferred exchange
                 volume = 1
@@ -141,7 +154,10 @@ def comb_sort_key(path):
                 return len(path)
 
         def check_cache(pair):
-
+            """
+            checking if the start and stoptime of a pair is already known
+            or if it needs to be downloaded
+            """
             if pair[1].get("starttime") or pair[1].get("stoptime"):
                 return True, pair
             if cacheres := self.cache.get(pair[1]["exchange"] + pair[1]["symbol"]):
@@ -240,7 +256,8 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
             elif stoptime == 0:
                 if starttime > timest[0]:
                     yield timest, newpath
-
+            # The most ideal situation is if the timerange of the path is known 
+            # and larger than the needed timerange
             else:
                 if stoptime < timest[1] and starttime > timest[0]:
                     yield timest, newpath

From 3939e1599e7d057a777c3082726cf6d6d77e0eb7 Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Thu, 8 Apr 2021 18:07:13 +0200
Subject: [PATCH 18/53] small formatting

---
 src/graph.py      | 16 ++++++++--------
 src/price_data.py |  4 +---
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index 4430cbe6..c1f3aea3 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -9,8 +9,8 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}):
             exchanges = ["binance", "coinbasepro"]
         self.gdict = gdict
         self.cache = cache
-        self.priority: dict[str,int] = {} 
-        #saves the priority for a certain path so that bad paths can be skipped
+        self.priority: dict[str, int] = {}
+        # saves the priority for a certain path so that bad paths can be skipped
         allpairs = []
 
         for exchange_id in exchanges:
@@ -27,10 +27,10 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}):
                 print(
                     f"{exchange.name} Does not support fetch ohlcv. ignoring exchange and {len(markets)} pairs."
                 )
-        allpairs = list(set(allpairs)) # fast an easy deduplication
+        allpairs = list(set(allpairs))  # fast an easy deduplication
         # print("Total Pairs to check:", len(allpairs))
-        allpairs.sort(key=lambda x: x[3]) 
-        #sorting by symbol for pair to have the same result on every run due to the set
+        allpairs.sort(key=lambda x: x[3])
+        # sorting by symbol for pair to have the same result on every run due to the set
         for i in allpairs:
             base = i[0]
             quote = i[1]
@@ -113,8 +113,8 @@ def comb_sort_key(path):
             - smallest length -> +1 per element
             - preferred exchange -> +1 per exchange which is not preferred
             - priority -> +0.5 per unfinished execution of path
-            - volume (if known) -> 1/sum(avg_vol per pair) 
-            - volume (if not known) -> 1 -> always smaller if volume is known 
+            - volume (if known) -> 1/sum(avg_vol per pair)
+            - volume (if not known) -> 1 -> always smaller if volume is known
             """
             if preferredexchange:
                 # prioritze pairs with the preferred exchange
@@ -256,7 +256,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
             elif stoptime == 0:
                 if starttime > timest[0]:
                     yield timest, newpath
-            # The most ideal situation is if the timerange of the path is known 
+            # The most ideal situation is if the timerange of the path is known
             # and larger than the needed timerange
             else:
                 if stoptime < timest[1] and starttime > timest[0]:
diff --git a/src/price_data.py b/src/price_data.py
index e65cc3dc..dbdf0b39 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -431,9 +431,7 @@ def get_candles(self, start: int, stop: int, symbol: str, exchange: str) -> list
             rang = max(int((stop - start) / 1000 / 60) + 2, 1)
             return list(exchange_obj.fetch_ohlcv(symbol, "1m", startval, rang))
         else:
-            log.error(
-                "fetchOHLCV not implemented on exchange, skipping ohlcv"
-            )
+            log.error("fetchOHLCV not implemented on exchange, skipping ohlcv")
             # shouldnt happen technically because exchanges are filterd for fetchohlcv
             return []
 

From 65cbfbfb6c7d5c798ffd7f96444dce9aba19945f Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Thu, 8 Apr 2021 19:22:59 +0200
Subject: [PATCH 19/53] RM `# type: ignore`

---
 src/graph.py      | 2 +-
 src/price_data.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index c1f3aea3..6215e435 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -1,6 +1,6 @@
 from time import sleep, time_ns
 
-import ccxt  # type: ignore
+import ccxt
 
 
 class PricePath:
diff --git a/src/price_data.py b/src/price_data.py
index dbdf0b39..ab4ed610 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -25,7 +25,7 @@
 from time import sleep
 from typing import Any, Optional, Union
 
-import ccxt  # type: ignore
+import ccxt
 import requests
 
 import config

From 888dba81cab5d7f2713769dd9205fd7fd6279d68 Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Thu, 8 Apr 2021 19:40:49 +0200
Subject: [PATCH 20/53] Use logging instead of print

---
 src/graph.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/graph.py b/src/graph.py
index 6215e435..65843fdc 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -1,7 +1,10 @@
+import logging
 from time import sleep, time_ns
 
 import ccxt
 
+log = logging.getLogger(__name__)
+
 
 class PricePath:
     def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}):
@@ -24,7 +27,7 @@ def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}):
                     [(i["base"], i["quote"], exchange_id, i["symbol"]) for i in markets]
                 )
             else:
-                print(
+                logging.warning(
                     f"{exchange.name} Does not support fetch ohlcv. ignoring exchange and {len(markets)} pairs."
                 )
         allpairs = list(set(allpairs))  # fast an easy deduplication

From 5d4f394580607da998a9c66248e5c9d5df12e576 Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Thu, 8 Apr 2021 19:49:03 +0200
Subject: [PATCH 21/53] Order requirements-dev alphabetically

---
 requirements-dev.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index 8a333263..4cae901f 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -4,8 +4,8 @@ click==7.1.2
 flake8==3.8.4
 isort==5.7.0
 mccabe==0.6.1
-mypy-extensions==0.4.3
 mypy==0.812
+mypy-extensions==0.4.3
 pathspec==0.8.1
 pycodestyle==2.6.0
 pyflakes==2.2.0

From be5ab791cd1d5bbef717a0efa90833e8c5228d1f Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Thu, 8 Apr 2021 19:49:15 +0200
Subject: [PATCH 22/53] ADD all required modules explicitly

---
 requirements.txt | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 542c7cb2..af2835fa 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,18 @@
+aiodns==2.0.0
+aiohttp==3.7.4.post0
+async-timeout==3.0.1
+attrs==20.3.0
 ccxt==1.42.47
 certifi==2020.12.5
+cffi==1.14.5
 chardet==4.0.0
+cryptography==3.4.7
 idna==2.10
+multidict==5.1.0
+pycares==3.1.1
+pycparser==2.20
 python-dateutil==2.8.1
 requests==2.25.1
 six==1.15.0
 urllib3==1.26.4
+yarl==1.1.0

From 8928d6d694d66ff727b4d2dfe957b07f6759a1b8 Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Thu, 8 Apr 2021 20:08:01 +0200
Subject: [PATCH 23/53] Use explicit import

---
 src/graph.py      | 6 +++---
 src/price_data.py | 7 +++----
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index 65843fdc..93874394 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -1,5 +1,5 @@
 import logging
-from time import sleep, time_ns
+import time
 
 import ccxt
 
@@ -176,7 +176,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
             if starttimestamp == 0:
                 starttimestamp = 1325372400 * 1000
             if stoptimestamp == -1:
-                stoptimestamp = time_ns() // 1_000_000  # get cur time in ms
+                stoptimestamp = time.time_ns() // 1_000_000  # get cur time in ms
             starttimestamp -= timeframe  # to handle edge cases
             if stoptimestamp > starttimestamp:
                 rangeinms = stoptimestamp - starttimestamp
@@ -197,7 +197,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
                     exchange = exchange_class()
                     # TODO maybe a more elaborate ratelimit wich removes execution
                     # time to from the ratelimit
-                    sleep(exchange.rateLimit / 1000)
+                    time.sleep(exchange.rateLimit / 1000)
                     timeframeexchange = exchange.timeframes.get("1w")
                     if (
                         timeframeexchange
diff --git a/src/price_data.py b/src/price_data.py
index ab4ed610..9f4a430b 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -22,17 +22,16 @@
 import sqlite3
 import time
 from pathlib import Path
-from time import sleep
 from typing import Any, Optional, Union
 
 import ccxt
 import requests
 
 import config
+import graph
 import misc
 import transaction
 from core import kraken_pair_map
-from graph import PricePath
 
 log = logging.getLogger(__name__)
 
@@ -45,7 +44,7 @@
 
 class PriceData:
     def __init__(self):
-        self.path = PricePath()
+        self.path = graph.PricePath()
 
     def get_db_path(self, platform: str) -> Path:
         return Path(config.DATA_PATH, f"{platform}.db")
@@ -425,7 +424,7 @@ def get_candles(self, start: int, stop: int, symbol: str, exchange: str) -> list
         exchange_class = getattr(ccxt, exchange)
         exchange_obj = exchange_class()
         if exchange_obj.has["fetchOHLCV"]:
-            sleep(exchange_obj.rateLimit / 1000)  # time.sleep wants seconds
+            time.sleep(exchange_obj.rateLimit / 1000)  # time.sleep wants seconds
             # get 2min before and after range
             startval = start - 1000 * 60 * 2
             rang = max(int((stop - start) / 1000 / 60) + 2, 1)

From b2f207478b5440768edf9da555f21ff09b9d304d Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Thu, 8 Apr 2021 20:09:15 +0200
Subject: [PATCH 24/53] FIX remove false *1000

---
 src/price_data.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/price_data.py b/src/price_data.py
index 9f4a430b..26012dff 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -515,10 +515,7 @@ def merge_prices(a: list, b: list = []) -> list:
                             ts = list(
                                 map(
                                     lambda x: (
-                                        abs(
-                                            misc.to_ms_timestamp(operation) * 1000
-                                            - x[0]
-                                        ),
+                                        abs(misc.to_ms_timestamp(operation) - x[0]),
                                         x,
                                     ),
                                     tempdata,

From 700930afeac547ad9a2cbe57c4ff82278c7a4c07 Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Fri, 9 Apr 2021 20:06:09 +0200
Subject: [PATCH 25/53] REFACTOR PriceData.get_candles

---
 src/price_data.py | 51 +++++++++++++++++++++++++++++++++++------------
 1 file changed, 38 insertions(+), 13 deletions(-)

diff --git a/src/price_data.py b/src/price_data.py
index 26012dff..fd78ec0c 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -19,6 +19,7 @@
 import decimal
 import json
 import logging
+import math
 import sqlite3
 import time
 from pathlib import Path
@@ -420,19 +421,43 @@ def get_cost(
             return price * tr.sold
         raise NotImplementedError
 
-    def get_candles(self, start: int, stop: int, symbol: str, exchange: str) -> list:
-        exchange_class = getattr(ccxt, exchange)
-        exchange_obj = exchange_class()
-        if exchange_obj.has["fetchOHLCV"]:
-            time.sleep(exchange_obj.rateLimit / 1000)  # time.sleep wants seconds
-            # get 2min before and after range
-            startval = start - 1000 * 60 * 2
-            rang = max(int((stop - start) / 1000 / 60) + 2, 1)
-            return list(exchange_obj.fetch_ohlcv(symbol, "1m", startval, rang))
-        else:
-            log.error("fetchOHLCV not implemented on exchange, skipping ohlcv")
-            # shouldnt happen technically because exchanges are filterd for fetchohlcv
-            return []
+    def get_candles(self, start: int, stop: int, symbol: str, exchange_id: str) -> list:
+        """Return list with candles starting 2 minutes before start.
+
+        Args:
+            start (int): Start time in milliseconds since epoch.
+            stop (int): End time in milliseconds.
+            symbol (str)
+            exchange_id (str)
+
+        Returns:
+            list: List of OHLCV candles gathered from ccxt.
+        """
+        assert stop >= start, f"`stop` must be after `start` {stop} !>= {start}."
+
+        exchange_class = getattr(ccxt, exchange_id)
+        exchange = exchange_class()
+        assert isinstance(exchange, ccxt.Exchange)
+
+        # Technically impossible. Unsupported exchanges should be detected earlier.
+        assert exchange.has["fetchOHLCV"]
+
+        # time.sleep wants seconds
+        time.sleep(exchange.rateLimit / 1000)
+
+        # Get candles 2 min before and after start/stop.
+        since = start - 2 * 60 * 1000
+        # `fetch_ohlcv` has no stop value but only a limit (amount of candles fetched).
+        # Calculate the amount of candles in the 1 min timeframe,
+        # so that we get enough candles.
+        # BUG Most exchange have an upper limit (e.g. binance 1000, coinbasepro 300).
+        #     We should throw a warning and make sure that `limit` is below their
+        #     supported maximum.
+        limit = math.ceil((stop - start) / (1000 * 60)) + 2
+
+        candles = exchange.fetch_ohlcv(symbol, "1m", since, limit)
+        assert isinstance(candles, list)
+        return candles
 
     def _get_bulk_pair_data_path(
         self,

From 3c036bb93d114a3c86b378530bdbe4f015a257a0 Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Fri, 9 Apr 2021 21:05:18 +0200
Subject: [PATCH 26/53] ADD flake8-bugbear to show additional warnings

---
 requirements-dev.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements-dev.txt b/requirements-dev.txt
index 4cae901f..15fe5aa7 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -2,6 +2,7 @@ appdirs==1.4.4
 black==20.8b1
 click==7.1.2
 flake8==3.8.4
+flake8-bugbear==21.4.3
 isort==5.7.0
 mccabe==0.6.1
 mypy==0.812

From 644c2a989229c90143bf7aa14466a8f15f687591 Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Sat, 10 Apr 2021 10:50:03 +0200
Subject: [PATCH 27/53] Adjust bug message, ccxt raises error when ohlc limit
 is exceeded

---
 src/price_data.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/price_data.py b/src/price_data.py
index fd78ec0c..60873df6 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -450,9 +450,8 @@ def get_candles(self, start: int, stop: int, symbol: str, exchange_id: str) -> l
         # `fetch_ohlcv` has no stop value but only a limit (amount of candles fetched).
         # Calculate the amount of candles in the 1 min timeframe,
         # so that we get enough candles.
-        # BUG Most exchange have an upper limit (e.g. binance 1000, coinbasepro 300).
-        #     We should throw a warning and make sure that `limit` is below their
-        #     supported maximum.
+        # Most exchange have an upper limit (e.g. binance 1000, coinbasepro 300).
+        # `ccxt` throws an error if we exceed this limit.
         limit = math.ceil((stop - start) / (1000 * 60)) + 2
 
         candles = exchange.fetch_ohlcv(symbol, "1m", since, limit)

From d28babdaba548a7d22f6cb335e58e6beeead185e Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Sat, 10 Apr 2021 12:48:03 +0200
Subject: [PATCH 28/53] REFACTOR PriceData.`preload_price_data_path` - Rename
 to `preload_prices` - ADD  `get_missing_price_operations` using one query to
 find all missing prices - ADD TODO to use bulk insert

---
 src/price_data.py | 133 +++++++++++++++++++++++++++++++++++-----------
 src/taxman.py     |   6 ++-
 2 files changed, 107 insertions(+), 32 deletions(-)

diff --git a/src/price_data.py b/src/price_data.py
index 60873df6..035c6baa 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -408,6 +408,70 @@ def get_price(
         self.__set_price_db(db_path, tablename, utc_time, price)
         return price
 
+    def get_missing_price_operations(
+        self,
+        operations: list[transaction.Operation],
+        coin: str,
+        platform: str,
+        reference_coin: str = config.FIAT,
+    ) -> list[transaction.Operation]:
+        """Return operations for which no price was found in the database.
+
+        Requires the `operations` to have the same `coin` and `platform`.
+
+        Args:
+            operations (list[transaction.Operation])
+            coin (str)
+            platform (str)
+            reference_coin (str): Defaults to `config.FIAT`.
+
+        Returns:
+            list[transaction.Operation]
+        """
+        assert all(op.coin == coin for op in operations)
+        assert all(op.platform == platform for op in operations)
+
+        # We do not have to calculate the price, if there are no operations or the
+        # coin is the same as the reference coin.
+        if not operations or coin == reference_coin:
+            return []
+
+        db_path = self.get_db_path(platform)
+        # If the price database does not exist, we need to query all prices.
+        if not db_path.is_file():
+            return operations
+
+        tablename = self.get_tablename(coin, reference_coin)
+        utc_time_values = ",".join(f"('{op.utc_time}')" for op in operations)
+
+        with sqlite3.connect(db_path) as conn:
+            cur = conn.cursor()
+            # The query returns a list with 0 and 1's.
+            # - 0: a price exists.
+            # - 1: the price is missing.
+            query = (
+                "SELECT t.utc_time IS NULL "
+                f"FROM (VALUES {utc_time_values}) "
+                f"LEFT JOIN `{tablename}` t ON t.utc_time = COLUMN1;"
+            )
+
+            # Execute the query.
+            try:
+                cur.execute(query)
+            except sqlite3.OperationalError as e:
+                if str(e) == f"no such table: {tablename}":
+                    # The corresponding price table does not exist yet.
+                    # We need to query all prices.
+                    return operations
+                raise e
+
+            # Evaluate the result.
+            result = (bool(is_missing) for is_missing, in cur.fetchall())
+            missing_prices_operations = [
+                op for op, is_missing in zip(operations, result) if is_missing
+            ]
+            return missing_prices_operations
+
     def get_cost(
         self,
         tr: Union[transaction.Operation, transaction.SoldCoin],
@@ -569,37 +633,44 @@ def merge_prices(a: list, b: list = []) -> list:
 
         return datacomb
 
-    def preload_price_data_path(
-        self, operations: list, coin: str, exchange: str = ""
+    def preload_prices(
+        self,
+        operations: list[transaction.Operation],
+        coin: str,
+        platform: str,
+        reference_coin: str = config.FIAT,
     ) -> None:
+        """Preload price data.
 
-        reference_coin = config.FIAT
-        # get pairs used for calculating the price
-        operations_filtered = []
+        Requires the operations to have the same `coin` and `exchange`.
 
-        tablename = self.get_tablename(coin, reference_coin)
-        operations_filtered = [
-            op
-            for op in operations
-            if not self.__get_price_db(
-                self.get_db_path(op.platform), tablename, op.utc_time
-            )
-        ]
-        operations_grouped: dict = {}
-        if operations_filtered:
-            for i in operations_filtered:
-                if i.coin == config.FIAT:
-                    pass
-                elif operations_grouped.get(i.platform):
-                    operations_grouped[i.platform].append(i)
-                else:
-                    operations_grouped[i.platform] = [i]
-            for platf in operations_grouped.keys():
-                data = self._get_bulk_pair_data_path(
-                    operations_grouped[platf],
-                    coin,
-                    reference_coin,
-                    preferredexchange=platf,
-                )
-                for p in data:
-                    self.set_price_db(platf, coin, reference_coin, p[0], p[1])
+        Args:
+            operations (list[transaction.Operation])
+            coin (str)
+            platform (str)
+            reference_coin (str): Defaults to `config.FIAT`.
+        """
+        assert all(op.coin == coin for op in operations)
+        assert all(op.platform == platform for op in operations)
+
+        # We do not have to preload prices, if there are no operations or the coin is
+        # the same as the reference coin.
+        if not operations or coin == reference_coin:
+            return
+
+        # Only consider the operations for which we have no prices in the database.
+        missing_prices_operations = self.get_missing_price_operations(
+            operations, coin, platform, reference_coin
+        )
+
+        # Preload the prices.
+        data = self._get_bulk_pair_data_path(
+            missing_prices_operations,
+            coin,
+            reference_coin,
+            preferredexchange=platform,
+        )
+
+        # TODO Use bulk insert to write all prices at once into the database.
+        for p in data:
+            self.set_price_db(platform, coin, reference_coin, p[0], p[1])
diff --git a/src/taxman.py b/src/taxman.py
index 1df6a84e..8531ddce 100644
--- a/src/taxman.py
+++ b/src/taxman.py
@@ -178,7 +178,11 @@ def evaluate_taxation(self) -> None:
         log.debug("Starting evaluation...")
         for coin, operations in misc.group_by(self.book.operations, "coin").items():
             operations = sorted(operations, key=lambda op: op.utc_time)
-            self.price_data.preload_price_data_path(operations, coin)
+
+            # Preload prices per exchange.
+            for platform, _operations in misc.group_by(operations, "platform").items():
+                self.price_data.preload_prices(_operations, coin, platform)
+
             self.__evaluate_taxation(coin, operations)
 
     def print_evaluation(self) -> None:

From 2d31fc14969c22ee5fbb549b5cd5713f08b8e923 Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Sat, 10 Apr 2021 13:06:51 +0200
Subject: [PATCH 29/53] FIX `force_decimal` should raise ValueError instead of
 KeyError

---
 src/misc.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/misc.py b/src/misc.py
index a8e6dbc3..c938b43a 100644
--- a/src/misc.py
+++ b/src/misc.py
@@ -75,7 +75,7 @@ def force_decimal(x: Union[str, int, float]) -> decimal.Decimal:
         x (Union[None, str, int, float])
 
     Raises:
-        KeyError: The given argument can not be parsed accordingly.
+        ValueError: The given argument can not be parsed accordingly.
 
     Returns:
         decimal.Decimal
@@ -84,7 +84,7 @@ def force_decimal(x: Union[str, int, float]) -> decimal.Decimal:
     if isinstance(d, decimal.Decimal):
         return d
     else:
-        raise KeyError(f"Could not parse `{d}` to decimal")
+        raise ValueError(f"Could not parse `{d}` to decimal")
 
 
 def reciprocal(d: decimal.Decimal) -> decimal.Decimal:

From 6acd035b0c2a9e8f96c18e19b6f3e8564897b0d3 Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Sat, 10 Apr 2021 13:20:17 +0200
Subject: [PATCH 30/53] ADD `get_avg_candle_prices`

---
 src/price_data.py | 51 +++++++++++++++++++++++++++++++++++++----------
 1 file changed, 41 insertions(+), 10 deletions(-)

diff --git a/src/price_data.py b/src/price_data.py
index 035c6baa..e11be918 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -522,6 +522,44 @@ def get_candles(self, start: int, stop: int, symbol: str, exchange_id: str) -> l
         assert isinstance(candles, list)
         return candles
 
+    def get_avg_candle_prices(
+        self, start: int, stop: int, symbol: str, exchange_id: str, invert: bool = False
+    ) -> list[tuple[int, decimal.Decimal]]:
+        """Return average price from ohlcv candles.
+
+        The average price of the candle is calculated as the avergae from the
+        open and close price.
+
+        Further information about candle-function can be found in `get_candles`.
+
+        Args:
+            start (int)
+            stop (int)
+            symbol (str)
+            exchange_id (str)
+            invert (bool, optional): Defaults to False.
+
+        Returns:
+            list: Timestamp and average prices of candles containing:
+
+                timestamp (int): Timestamp of candle in milliseconds since epoch.
+                avg_price (decimal.Decimal): Average price of candle.
+        """
+        avg_candle_prices = []
+        candle_prices = self.get_candles(start, stop, symbol, exchange_id)
+
+        for timestamp_ms, _open, _high, _low, _close, _volume in candle_prices:
+            open = misc.force_decimal(_open)
+            close = misc.force_decimal(_close)
+
+            avg_price = (open + close) / 2
+
+            if invert and avg_price != 0:
+                avg_price = 1 / avg_price
+
+            avg_candle_prices.append((timestamp_ms, avg_price))
+        return avg_candle_prices
+
     def _get_bulk_pair_data_path(
         self,
         operations: list,
@@ -584,17 +622,10 @@ def merge_prices(a: list, b: list = []) -> list:
                     symbol = p[1][i][1]["symbol"]
                     exchange = p[1][i][1]["exchange"]
                     invert = p[1][i][1]["inverted"]
-                    candles = self.get_candles(first, last, symbol, exchange)
-                    if invert:
-                        tempdata = list(
-                            map(lambda x: (x[0], 1 / ((x[1] + x[4]) / 2)), candles)
-                        )
-                    else:
-                        tempdata = list(
-                            map(lambda x: (x[0], (x[1] + x[4]) / 2), candles)
-                        )
 
-                    if tempdata:
+                    if tempdata := self.get_avg_candle_prices(
+                        first, last, symbol, exchange, invert
+                    ):
                         for operation in batch:
                             # TODO discuss which candle is picked
                             # current is closest to original date

From 6dcb6cebd65466fc2bed9704bf31b8ad005746c6 Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Sat, 10 Apr 2021 13:20:51 +0200
Subject: [PATCH 31/53] UPDATE `get_candles` docstring

---
 src/price_data.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/price_data.py b/src/price_data.py
index e11be918..e25e2688 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -485,17 +485,26 @@ def get_cost(
             return price * tr.sold
         raise NotImplementedError
 
-    def get_candles(self, start: int, stop: int, symbol: str, exchange_id: str) -> list:
+    def get_candles(
+        self, start: int, stop: int, symbol: str, exchange_id: str
+    ) -> list[tuple[int, float, float, float, float, float]]:
         """Return list with candles starting 2 minutes before start.
 
         Args:
             start (int): Start time in milliseconds since epoch.
-            stop (int): End time in milliseconds.
+            stop (int): End time in milliseconds since epoch.
             symbol (str)
             exchange_id (str)
 
         Returns:
-            list: List of OHLCV candles gathered from ccxt.
+            list: List of OHLCV candles gathered from ccxt containing:
+
+                timestamp (int): Timestamp of candle in milliseconds since epoch.
+                open_price (float)
+                lowest_price (float)
+                highest_price (float)
+                close_price (float)
+                volume (float)
         """
         assert stop >= start, f"`stop` must be after `start` {stop} !>= {start}."
 

From 591c7494ed0e5153df76496c016ded19d069b37e Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Sat, 10 Apr 2021 13:53:30 +0200
Subject: [PATCH 32/53] FIX ignore missing import of `ccxt` module `ccxt` does
 not provide type hints

---
 setup.cfg | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/setup.cfg b/setup.cfg
index 10a17170..3ae0206f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -9,6 +9,9 @@ warn_return_any = True
 show_error_codes = True
 warn_unused_configs = True
 
+[mypy-ccxt.*]
+ignore_missing_imports = True
+
 [flake8]
 exclude = *py*env*/
 max_line_length = 88

From 522644746d444a3ed919a76aa777ed9b0a369515 Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Sat, 10 Apr 2021 20:39:13 +0200
Subject: [PATCH 33/53] FIX mypy/flake8 errors and some refactoring graph

---
 src/graph.py      | 82 ++++++++++++++++++++++++++---------------------
 src/price_data.py | 12 +++----
 2 files changed, 50 insertions(+), 44 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index 93874394..0e4cfdac 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -1,5 +1,7 @@
+import collections
 import logging
 import time
+from typing import Optional
 
 import ccxt
 
@@ -7,43 +9,59 @@
 
 
 class PricePath:
-    def __init__(self, exchanges: list = [], gdict: dict = {}, cache: dict = {}):
-        if not exchanges:
-            exchanges = ["binance", "coinbasepro"]
+    def __init__(
+        self,
+        exchanges: Optional[list[str]] = None,
+        gdict: Optional[dict] = None,
+        cache: Optional[dict] = None,
+    ):
+        if exchanges is None:
+            exchanges = []
+        if gdict is None:
+            gdict = {}
+        if cache is None:
+            cache = {}
+
         self.gdict = gdict
         self.cache = cache
-        self.priority: dict[str, int] = {}
-        # saves the priority for a certain path so that bad paths can be skipped
-        allpairs = []
+
+        # Saves the priority for a certain path so that bad paths can be skipped.
+        self.priority: collections.defaultdict[str, int] = collections.defaultdict(int)
+        allpairs: list[tuple[str, str, str, str]] = []
 
         for exchange_id in exchanges:
             exchange_class = getattr(ccxt, exchange_id)
             exchange = exchange_class()
-            markets = []
             markets = exchange.fetch_markets()
-            if exchange.has["fetchOHLCV"]:
+            assert isinstance(markets, list)
 
+            if exchange.has["fetchOHLCV"]:
                 allpairs.extend(
                     [(i["base"], i["quote"], exchange_id, i["symbol"]) for i in markets]
                 )
             else:
                 logging.warning(
-                    f"{exchange.name} Does not support fetch ohlcv. ignoring exchange and {len(markets)} pairs."
+                    f"{exchange.name} does not support fetch ohlcv. "
+                    f"Ignoring exchange and {len(markets)} pairs."
                 )
-        allpairs = list(set(allpairs))  # fast an easy deduplication
+
+        # Remove duplicate pairs.
+        # TODO It might be faster to create it directly as set.
+        #      Is it even necessary to convert it to a list?
+        allpairs = list(set(allpairs))
         # print("Total Pairs to check:", len(allpairs))
+
+        # Sorting by `symbol` to have the same result on every run due to the set.
         allpairs.sort(key=lambda x: x[3])
-        # sorting by symbol for pair to have the same result on every run due to the set
-        for i in allpairs:
-            base = i[0]
-            quote = i[1]
+
+        for base, quote, exchange, symbol in allpairs:
             self.addVertex(base)
             self.addVertex(quote)
             self.addEdge(
-                base, quote, {"exchange": i[2], "symbol": i[3], "inverted": False}
+                base, quote, {"exchange": exchange, "symbol": symbol, "inverted": False}
             )
             self.addEdge(
-                quote, base, {"exchange": i[2], "symbol": i[3], "inverted": True}
+                quote, base, {"exchange": exchange, "symbol": symbol, "inverted": True}
             )
 
     def edges(self):
@@ -81,11 +99,7 @@ def _getpath(self, start, stop, maxdepth, depth=0):
         if (edges := self.gdict.get(start)) and maxdepth > depth:
             for edge in edges:
                 if depth == 0 and edge[0] == stop:
-                    paths.append(
-                        [
-                            edge,
-                        ]
-                    )
+                    paths.append([edge])
                 elif edge[0] == stop:
                     paths.append(edge)
                 else:
@@ -93,26 +107,22 @@ def _getpath(self, start, stop, maxdepth, depth=0):
                     if len(path) and path is not None:
                         for p in path:
                             if p[0] == stop:
-                                newpath = [
-                                    edge,
-                                ]
+                                newpath = [edge]
                                 newpath.append(p)
                                 paths.append(newpath)
         return paths
 
     def change_prio(self, key, value):
         ke = "-".join(key)
-        if self.priority.get(ke):
-            self.priority[ke] += value
-        else:
-            self.priority[ke] = value
+        self.priority[ke] += value
 
     def getpath(
         self, start, stop, starttime=0, stoptime=0, preferredexchange=None, maxdepth=3
     ):
         def comb_sort_key(path):
             """
-            Sorting funtction which is used to prioritze paths by (in order of magnitude):
+            Sorting function which is used to prioritize paths by:
+            (in order of magnitude)
             - smallest length -> +1 per element
             - preferred exchange -> +1 per exchange which is not preferred
             - priority -> +0.5 per unfinished execution of path
@@ -123,13 +133,11 @@ def comb_sort_key(path):
                 # prioritze pairs with the preferred exchange
                 volume = 1
                 volumenew = 0
-                if not (
-                    priority := self.priority.get(
-                        "-".join([a[1]["symbol"] for a in path])
-                    )
-                ):
-                    priority = 0
-                for c in [a if (a := check_cache(pair)) else None for pair in path]:
+                priority = self.priority.get(
+                    "-".join([a[1]["symbol"] for a in path]), 0
+                )
+                xl = (a if (a := check_cache(pair)) else None for pair in path)
+                for c in xl:
                     if c and c[0]:
                         if c[1][1]["stoptime"] == 0:
                             break
@@ -267,7 +275,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
 
 
 if __name__ == "__main__":
-    g = PricePath()
+    g = PricePath(exchanges=["binance", "coinbasepro"])
     start = "IOTA"
     to = "EUR"
     preferredexchange = "binance"
diff --git a/src/price_data.py b/src/price_data.py
index e25e2688..f7b55f47 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -576,17 +576,15 @@ def _get_bulk_pair_data_path(
         reference_coin: str,
         preferredexchange: str = "binance",
     ) -> list:
-        def merge_prices(a: list, b: list = []) -> list:
-            prices = []
+        def merge_prices(a: list, b: Optional[list] = None) -> list:
             if not b:
                 return a
+
+            prices = []
             for i in a:
-                factor = None
-                for j in b:
-                    if i[0] == j[0]:
-                        factor = j[1]
-                        break
+                factor = next(j[1] for j in b if i[0] == j[0])
                 prices.append((i[0], i[1] * factor))
+
             return prices
 
         timestamps: list = []

From 35536ae35f43b2af50a37164165f68c760078c9f Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Sun, 11 Apr 2021 14:36:32 +0200
Subject: [PATCH 34/53] ADD make venv and some comments in makefile

---
 Makefile | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index a2927f0c..72bfa664 100644
--- a/Makefile
+++ b/Makefile
@@ -9,6 +9,7 @@ mypy:
 check-isort:
 	isort . --check-only --diff
 
+# Run linter
 lint: flake8 mypy check-isort
 
 isort:
@@ -17,13 +18,21 @@ isort:
 black:
 	black src
 
+# Run formatter
 format: black isort
 
+# Run the project
+run:
+	python src/main.py
+
+# Install requirements
 install:
 	python -m pip install --upgrade pip
 	pip install -r requirements.txt -r requirements-dev.txt
 
-run:
-	python src/main.py
+# Setup virtuel environment
+venv:
+	python -m venv .pyenv
+	.pyenv\Scripts\activate && make install	
 
-.PHONY: flake8 mypy check-isort lint isort black format install run
+.PHONY: flake8 mypy check-isort lint isort black format run install venv

From 2e9b866a5d30966d26e65a11b858544b3e95e9bd Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Sun, 11 Apr 2021 17:06:31 +0200
Subject: [PATCH 35/53] REFACTOR Getting time batches from operations... in
 _get_bulk_pair_data_path - ADD transaction.time_batches

---
 src/price_data.py  | 28 +++++++---------------
 src/transaction.py | 59 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 20 deletions(-)

diff --git a/src/price_data.py b/src/price_data.py
index f7b55f47..4452e1a4 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -587,28 +587,16 @@ def merge_prices(a: list, b: Optional[list] = None) -> list:
 
             return prices
 
-        timestamps: list = []
-        timestamppairs: list = []
-        maxminutes = (
-            300  # coinbasepro only allows a max of 300 minutes need a better solution
+        # TODO Set `max_difference` to the platform specific ohlcv-limit.
+        max_difference = 300  # coinbasepro
+        # TODO Set `max_size` to the platform specific ohlcv-limit.
+        max_size = 300  # coinbasepro
+        time_batches = transaction.time_batches(
+            operations, max_difference=max_difference, max_size=max_size
         )
-        timestamps = [op.utc_time for op in operations]
-        if not preferredexchange:
-            preferredexchange = "binance"
-
-        current_first = None
-        for timestamp in timestamps:
-            if (
-                current_first
-                and current_first + datetime.timedelta(minutes=maxminutes - 4)
-                > timestamp
-            ):
-                timestamppairs[-1].append(timestamp)
-            else:
-                current_first = timestamp
-                timestamppairs.append([timestamp])
+
         datacomb = []
-        for batch in timestamppairs:
+        for batch in time_batches:
             # ccxt works with timestamps in milliseconds
             first = misc.to_ms_timestamp(batch[0])
             last = misc.to_ms_timestamp(batch[-1])
diff --git a/src/transaction.py b/src/transaction.py
index 7bdcce32..6eebdd85 100644
--- a/src/transaction.py
+++ b/src/transaction.py
@@ -130,3 +130,62 @@ class TaxEvent:
     taxed_gain: decimal.Decimal
     op: Operation
     remark: str = ""
+
+
+# Functions
+
+
+def time_batches(
+    operations: list[Operation],
+    max_difference: typing.Optional[int],
+    max_size: typing.Optional[int] = None,
+) -> typing.Iterable[list[datetime.datetime]]:
+    """Return timestamps of operations in batches.
+
+    The batches are clustered such that the batches time difference
+    from first to last operation is lesser than `max_difference` minutes and the
+    batches have a maximum size of `max_size`.
+
+    TODO Solve the clustering optimally. (It's already optimal, if max_size is None.)
+
+    Args:
+        operations (list[Operation]): List of operations.
+        max_difference (Optional[int], optional):
+            Maximal time difference in batch (in minutes).
+            Defaults to None (unlimited time difference).
+        limax_sizemit (Optional[int], optional):
+            Maximum size of batch.
+            Defaults to None (unlimited size).
+
+    Yields:
+        Generator[None, list[datetime.datetime], None]: Yield the timestamp clusters.
+    """
+    assert max_difference is None or max_difference >= 0
+    assert max_size is None or max_size > 0
+
+    batch: list[datetime.datetime] = []
+
+    if not operations:
+        # Nothing to cluster, return empty list.
+        return batch
+
+    # Calculate the latest time which is allowed to be in this cluster.
+    if max_difference:
+        max_time = operations[0].utc_time + datetime.timedelta(minutes=max_difference)
+    else:
+        max_time = datetime.datetime.max
+
+    for op in operations:
+        timestamp = op.utc_time
+
+        # Check if timestamp is before max_time and
+        # that our cluster isn't to large already.
+        if timestamp < max_time and (not max_size or len(batch) < max_size):
+            batch.append(timestamp)
+        else:
+            yield batch
+
+            batch = [timestamp]
+
+            if max_difference:
+                max_time = timestamp + datetime.timedelta(minutes=max_difference)

From 842f7319afc501096d39f8af55af6b3473a5b783 Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Sun, 11 Apr 2021 17:10:19 +0200
Subject: [PATCH 36/53] ADD TODO: preferredexchange default only for debug
 purposes

---
 src/price_data.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/price_data.py b/src/price_data.py
index 4452e1a4..1e0f1766 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -569,6 +569,8 @@ def get_avg_candle_prices(
             avg_candle_prices.append((timestamp_ms, avg_price))
         return avg_candle_prices
 
+    # TODO preferredexchange default is only for debug purposes and should be
+    #      removed later on.
     def _get_bulk_pair_data_path(
         self,
         operations: list,

From cac907c8fde52d269d5fe527b0cd0680637a6211 Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Tue, 27 Apr 2021 19:36:45 +0200
Subject: [PATCH 37/53] refractored path sorting rename variables and function
 to underscored variant renamed some bad variables

---
 Makefile     |  2 +-
 src/graph.py | 72 ++++++++++++++++++++++++++--------------------------
 2 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/Makefile b/Makefile
index 72bfa664..aad7d66c 100644
--- a/Makefile
+++ b/Makefile
@@ -30,7 +30,7 @@ install:
 	python -m pip install --upgrade pip
 	pip install -r requirements.txt -r requirements-dev.txt
 
-# Setup virtuel environment
+# Setup virtual environment
 venv:
 	python -m venv .pyenv
 	.pyenv\Scripts\activate && make install	
diff --git a/src/graph.py b/src/graph.py
index 0e4cfdac..84c4e462 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -55,21 +55,21 @@ def __init__(
         allpairs.sort(key=lambda x: x[3])
 
         for base, quote, exchange, symbol in allpairs:
-            self.addVertex(base)
-            self.addVertex(quote)
-            self.addEdge(
+            self.add_Vertex(base)
+            self.add_Vertex(quote)
+            self.add_Edge(
                 base, quote, {"exchange": exchange, "symbol": symbol, "inverted": False}
             )
-            self.addEdge(
+            self.add_Edge(
                 quote, base, {"exchange": exchange, "symbol": symbol, "inverted": True}
             )
 
     def edges(self):
-        return self.findedges()
+        return self.find_edges()
 
     # Find the distinct list of edges
 
-    def findedges(self):
+    def find_edges(self):
         edgename = []
         for vrtx in self.gdict:
             for nxtvrtx in self.gdict[vrtx]:
@@ -77,21 +77,21 @@ def findedges(self):
                     edgename.append({vrtx, nxtvrtx})
         return edgename
 
-    def getVertices(self):
+    def get_Vertices(self):
         return list(self.gdict.keys())
 
     # Add the vertex as a key
-    def addVertex(self, vrtx):
+    def add_Vertex(self, vrtx):
         if vrtx not in self.gdict:
             self.gdict[vrtx] = []
 
-    def addEdge(self, vrtx1, vrtx2, data):
+    def add_Edge(self, vrtx1, vrtx2, data):
         if vrtx1 in self.gdict:
             self.gdict[vrtx1].append((vrtx2, data))
         else:
             self.gdict[vrtx1] = [vrtx2]
 
-    def _getpath(self, start, stop, maxdepth, depth=0):
+    def _get_path(self, start, stop, maxdepth, depth=0):
         """
         a recursive function for finding all possible paths between to edges
         """
@@ -103,7 +103,7 @@ def _getpath(self, start, stop, maxdepth, depth=0):
                 elif edge[0] == stop:
                     paths.append(edge)
                 else:
-                    path = self._getpath(edge[0], stop, maxdepth, depth=depth + 1)
+                    path = self._get_path(edge[0], stop, maxdepth, depth=depth + 1)
                     if len(path) and path is not None:
                         for p in path:
                             if p[0] == stop:
@@ -116,7 +116,7 @@ def change_prio(self, key, value):
         ke = "-".join(key)
         self.priority[ke] += value
 
-    def getpath(
+    def get_path(
         self, start, stop, starttime=0, stoptime=0, preferredexchange=None, maxdepth=3
     ):
         def comb_sort_key(path):
@@ -129,27 +129,28 @@ def comb_sort_key(path):
             - volume (if known) -> 1/sum(avg_vol per pair)
             - volume (if not known) -> 1 -> always smaller if volume is known
             """
-            if preferredexchange:
-                # prioritze pairs with the preferred exchange
-                volume = 1
-                volumenew = 0
-                priority = self.priority.get(
-                    "-".join([a[1]["symbol"] for a in path]), 0
-                )
-                xl = (a if (a := check_cache(pair)) else None for pair in path)
-                for c in xl:
-                    if c and c[0]:
-                        if c[1][1]["stoptime"] == 0:
-                            break
-                        elif c[1][1]["avg_vol"] != 0:
-                            # is very much off because volume is not in the same
-                            # currency something for later
-                            volumenew += c[1][1]["avg_vol"]
-
-                    else:
+            # prioritze pairs with the preferred exchange
+            volume = 1
+            volumenew = 0
+            priority = self.priority.get("-".join([a[1]["symbol"] for a in path]), 0)
+            pathlis = (a if (a := check_cache(pair)) else None for pair in path)
+            for possiblepath in pathlis:
+                if possiblepath and possiblepath[0]:
+                    if possiblepath[1][1]["stoptime"] == 0:
                         break
+                    elif possiblepath[1][1]["avg_vol"] != 0:
+                        # is very much off because volume is not in the same
+                        # currency something for later
+                        volumenew += possiblepath[1][1]["avg_vol"]
+
                 else:
-                    volume = 1 / volumenew
+                    break
+            else:
+                volume = 1 / volumenew
+            temppriority = volume + priority
+
+            if preferredexchange:
+
                 return (
                     len(path)
                     + sum(
@@ -158,11 +159,10 @@ def comb_sort_key(path):
                             for pair in path
                         ]
                     )
-                    + volume
-                    + priority
+                    + temppriority
                 )
             else:
-                return len(path)
+                return len(path) + temppriority
 
         def check_cache(pair):
             """
@@ -251,7 +251,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
             return (globalstarttime, globalstoptime), path
 
         # get all possible paths which are no longer than 4 pairs long
-        paths = self._getpath(start, stop, maxdepth)
+        paths = self._get_path(start, stop, maxdepth)
         # sort by path length to get minimal conversion chain to reduce error
         paths = sorted(paths, key=comb_sort_key)
         # get timeframe in which a path is viable
@@ -279,7 +279,7 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
     start = "IOTA"
     to = "EUR"
     preferredexchange = "binance"
-    path = g.getpath(start, to, maxdepth=2, preferredexchange=preferredexchange)
+    path = g.get_path(start, to, maxdepth=2, preferredexchange=preferredexchange)
     # debug only in actual use we would iterate over
     # the path object fetching new paths as needed
     path = list(path)

From dd8fb3e8258cbbb87a77e63c54792689ddc2cd5d Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Tue, 27 Apr 2021 22:12:02 +0200
Subject: [PATCH 38/53] better ratelimiting and exchanges are set via config

---
 src/config.py     |  1 +
 src/graph.py      | 34 ++++++++++++++++++++++++++--------
 src/price_data.py |  4 ++--
 3 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/src/config.py b/src/config.py
index bba8ced3..6ef53f08 100644
--- a/src/config.py
+++ b/src/config.py
@@ -43,3 +43,4 @@ def IS_LONG_TERM(buy: datetime, sell: datetime) -> bool:
 DATA_PATH = Path(BASE_PATH, "data")
 EXPORT_PATH = Path(BASE_PATH, "export")
 FIAT = FIAT_CLASS.name  # Convert to string.
+EXCHANGES = ["binance", "coinbasepro"]
diff --git a/src/graph.py b/src/graph.py
index 84c4e462..e5e513db 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -1,6 +1,7 @@
 import collections
 import logging
 import time
+import config
 from typing import Optional
 
 import ccxt
@@ -8,6 +9,21 @@
 log = logging.getLogger(__name__)
 
 
+class RateLimit:
+    exchangedict = {}
+
+    def limit(self, exchange):
+        if lastcall := self.exchangedict.get(exchange.id):
+            now = time.time()
+            delay = exchange.rateLimit / 1000
+            timepassed = now - lastcall
+            if (waitfor := delay - timepassed) > 0:
+                time.sleep(waitfor)
+            self.exchangedict[exchange.id] = time.time()
+        else:
+            self.exchangedict[exchange.id] = time.time()
+
+
 class PricePath:
     def __init__(
         self,
@@ -16,7 +32,7 @@ def __init__(
         cache: Optional[dict] = None,
     ):
         if exchanges is None:
-            exchanges = []
+            exchanges = list(config.EXCHANGES)
         if gdict is None:
             gdict = {}
         if cache is None:
@@ -24,10 +40,11 @@ def __init__(
 
         self.gdict = gdict
         self.cache = cache
+        self.RateLimit = RateLimit()
 
         # Saves the priority for a certain path so that bad paths can be skipped.
         self.priority: collections.defaultdict[str, int] = collections.defaultdict(int)
-        allpairs: list[tuple[str, str, str, str]] = []
+        allpairs: list(tuple[str, str, str, str]) = []
 
         for exchange_id in exchanges:
             exchange_class = getattr(ccxt, exchange_id)
@@ -89,15 +106,17 @@ def add_Edge(self, vrtx1, vrtx2, data):
         if vrtx1 in self.gdict:
             self.gdict[vrtx1].append((vrtx2, data))
         else:
-            self.gdict[vrtx1] = [vrtx2]
+            self.gdict[vrtx1] = [
+                (vrtx2, data),
+            ]
 
     def _get_path(self, start, stop, maxdepth, depth=0):
         """
-        a recursive function for finding all possible paths between to edges
+        a recursive function for finding all possible paths between to vertices
         """
         paths = []
         if (edges := self.gdict.get(start)) and maxdepth > depth:
-            for edge in edges:
+            for edge in edges:  # list of edges starting from the start vertice
                 if depth == 0 and edge[0] == stop:
                     paths.append([edge])
                 elif edge[0] == stop:
@@ -203,9 +222,8 @@ def get_active_timeframe(path, starttimestamp=0, stoptimestamp=-1):
                 if not cached:
                     exchange_class = getattr(ccxt, path[i][1]["exchange"])
                     exchange = exchange_class()
-                    # TODO maybe a more elaborate ratelimit wich removes execution
-                    # time to from the ratelimit
-                    time.sleep(exchange.rateLimit / 1000)
+
+                    self.RateLimit.limit(exchange)
                     timeframeexchange = exchange.timeframes.get("1w")
                     if (
                         timeframeexchange
diff --git a/src/price_data.py b/src/price_data.py
index 1e0f1766..5a54ef16 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -516,7 +516,7 @@ def get_candles(
         assert exchange.has["fetchOHLCV"]
 
         # time.sleep wants seconds
-        time.sleep(exchange.rateLimit / 1000)
+        self.path.RateLimit.limit(exchange)
 
         # Get candles 2 min before and after start/stop.
         since = start - 2 * 60 * 1000
@@ -607,7 +607,7 @@ def merge_prices(a: list, b: Optional[list] = None) -> list:
             log.info(
                 f"getting data from {str(firststr)} to {str(laststr)} for {str(coin)}"
             )
-            path = self.path.getpath(
+            path = self.path.get_path(
                 coin, reference_coin, first, last, preferredexchange=preferredexchange
             )
             for p in path:

From 98c049f0d8dfc2a8470e3163403cc2bf0242a87b Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Sun, 2 May 2021 10:28:10 +0200
Subject: [PATCH 39/53] change from list to set

---
 src/graph.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index e5e513db..400337d6 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -44,7 +44,7 @@ def __init__(
 
         # Saves the priority for a certain path so that bad paths can be skipped.
         self.priority: collections.defaultdict[str, int] = collections.defaultdict(int)
-        allpairs: list(tuple[str, str, str, str]) = []
+        allpairs: set(tuple[str, str, str, str]) = set()
 
         for exchange_id in exchanges:
             exchange_class = getattr(ccxt, exchange_id)
@@ -53,9 +53,11 @@ def __init__(
             assert isinstance(markets, list)
 
             if exchange.has["fetchOHLCV"]:
-                allpairs.extend(
-                    [(i["base"], i["quote"], exchange_id, i["symbol"]) for i in markets]
-                )
+                toadd = [
+                    (i["base"], i["quote"], exchange_id, i["symbol"]) for i in markets
+                ]
+                for pair in toadd:
+                    allpairs.add(pair)
             else:
                 logging.warning(
                     f"{exchange.name} does not support fetch ohlcv. "
@@ -65,7 +67,8 @@ def __init__(
         # Remove duplicate pairs.
         # TODO It might be faster to create it directly as set.
         #      Is it even necessary to convert it to a list?
-        allpairs = list(set(allpairs))
+        # allpairs = list(set(allpairs))
+        allpairs = list(allpairs)
         # print("Total Pairs to check:", len(allpairs))
 
         # Sorting by `symbol` to have the same result on every run due to the set.

From 0808511554ea478c4f76104b064747e91c7df602 Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Wed, 26 May 2021 17:00:54 +0200
Subject: [PATCH 40/53] fixed a bug which caused misses when looking up
 price_data

---
 src/price_data.py  | 24 +++++++++++++-----------
 src/transaction.py |  1 +
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/src/price_data.py b/src/price_data.py
index 5a54ef16..a7dcc2a4 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -536,7 +536,7 @@ def get_avg_candle_prices(
     ) -> list[tuple[int, decimal.Decimal]]:
         """Return average price from ohlcv candles.
 
-        The average price of the candle is calculated as the avergae from the
+        The average price of the candle is calculated as the average from the
         open and close price.
 
         Further information about candle-function can be found in `get_candles`.
@@ -598,6 +598,7 @@ def merge_prices(a: list, b: Optional[list] = None) -> list:
         )
 
         datacomb = []
+
         for batch in time_batches:
             # ccxt works with timestamps in milliseconds
             first = misc.to_ms_timestamp(batch[0])
@@ -612,7 +613,7 @@ def merge_prices(a: list, b: Optional[list] = None) -> list:
             )
             for p in path:
                 tempdatalis: list = []
-                printstr = [a[1]["symbol"] for a in p[1]]
+                printstr = [f"{a[1]['symbol']} ({a[1]['exchange']})" for a in p[1]]
                 log.debug(f"found path over {' -> '.join(printstr)}")
                 for i in range(len(p[1])):
                     tempdatalis.append([])
@@ -692,13 +693,14 @@ def preload_prices(
         )
 
         # Preload the prices.
-        data = self._get_bulk_pair_data_path(
-            missing_prices_operations,
-            coin,
-            reference_coin,
-            preferredexchange=platform,
-        )
+        if missing_prices_operations:
+            data = self._get_bulk_pair_data_path(
+                missing_prices_operations,
+                coin,
+                reference_coin,
+                preferredexchange=platform,
+            )
 
-        # TODO Use bulk insert to write all prices at once into the database.
-        for p in data:
-            self.set_price_db(platform, coin, reference_coin, p[0], p[1])
+            # TODO Use bulk insert to write all prices at once into the database.
+            for p in data:
+                self.set_price_db(platform, coin, reference_coin, p[0], p[1])
diff --git a/src/transaction.py b/src/transaction.py
index 6eebdd85..91db0159 100644
--- a/src/transaction.py
+++ b/src/transaction.py
@@ -189,3 +189,4 @@ def time_batches(
 
             if max_difference:
                 max_time = timestamp + datetime.timedelta(minutes=max_difference)
+    yield batch  # fixes bug where last batch ist not yielded

From 6bda2145e528379b35edf28847079d1f16b2cc00 Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Tue, 15 Jun 2021 13:38:46 +0200
Subject: [PATCH 41/53] fix ratelimit for kraken

---
 src/graph.py      |  9 ++++++---
 src/price_data.py | 10 +++++++++-
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index 400337d6..ecbb28a2 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -16,9 +16,11 @@ def limit(self, exchange):
         if lastcall := self.exchangedict.get(exchange.id):
             now = time.time()
             delay = exchange.rateLimit / 1000
+            if exchange.name == "Kraken":
+                delay += 2  # the reported ratelimit gets exceeded sometimes
             timepassed = now - lastcall
             if (waitfor := delay - timepassed) > 0:
-                time.sleep(waitfor)
+                time.sleep(waitfor + 0.5)
             self.exchangedict[exchange.id] = time.time()
         else:
             self.exchangedict[exchange.id] = time.time()
@@ -153,7 +155,7 @@ def comb_sort_key(path):
             """
             # prioritze pairs with the preferred exchange
             volume = 1
-            volumenew = 0
+            volumenew = 1
             priority = self.priority.get("-".join([a[1]["symbol"] for a in path]), 0)
             pathlis = (a if (a := check_cache(pair)) else None for pair in path)
             for possiblepath in pathlis:
@@ -163,7 +165,8 @@ def comb_sort_key(path):
                     elif possiblepath[1][1]["avg_vol"] != 0:
                         # is very much off because volume is not in the same
                         # currency something for later
-                        volumenew += possiblepath[1][1]["avg_vol"]
+                        # volumenew*= volume of next thing in path (needs to be fixed for inverted paths)
+                        volumenew *= possiblepath[1][1]["avg_vol"]
 
                 else:
                     break
diff --git a/src/price_data.py b/src/price_data.py
index a7dcc2a4..d89dfcba 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -526,8 +526,15 @@ def get_candles(
         # Most exchange have an upper limit (e.g. binance 1000, coinbasepro 300).
         # `ccxt` throws an error if we exceed this limit.
         limit = math.ceil((stop - start) / (1000 * 60)) + 2
+        try:
+            candles = exchange.fetch_ohlcv(symbol, "1m", since, limit)
+        except ccxt.RateLimitExceeded:
+            # sometimes the ratelimit gets exceeded for kraken dunno why
+            logging.warning("Ratelimit exceeded sleeping 10 seconds and retrying")
+            time.sleep(10)
+            self.path.RateLimit.limit(exchange)
+            candles = exchange.fetch_ohlcv(symbol, "1m", since, limit)
 
-        candles = exchange.fetch_ohlcv(symbol, "1m", since, limit)
         assert isinstance(candles, list)
         return candles
 
@@ -611,6 +618,7 @@ def merge_prices(a: list, b: Optional[list] = None) -> list:
             path = self.path.get_path(
                 coin, reference_coin, first, last, preferredexchange=preferredexchange
             )
+            # Todo Move the path calculation out of the for loop and only filter after time
             for p in path:
                 tempdatalis: list = []
                 printstr = [f"{a[1]['symbol']} ({a[1]['exchange']})" for a in p[1]]

From 908043e706d79c2d43b2420da707f7f150830a1e Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Sun, 15 Aug 2021 09:42:36 +0200
Subject: [PATCH 42/53] Update requirements.txt

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 22ad6820..011271fd 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ aiodns==2.0.0
 aiohttp==3.7.4.post0
 async-timeout==3.0.1
 attrs==20.3.0
-ccxt==1.42.47
+ccxt==1.42.7
 certifi==2020.12.5
 cffi==1.14.5
 chardet==4.0.0

From f0a52ca39c92aebb08280b703dd200fdfda1ded5 Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Wed, 8 Sep 2021 10:33:38 +0200
Subject: [PATCH 43/53] fix formatting

---
 src/graph.py       | 12 +++++++-----
 src/price_data.py  |  3 ++-
 src/transaction.py |  3 +--
 3 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index ecbb28a2..b30ad209 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -1,16 +1,17 @@
 import collections
 import logging
 import time
-import config
-from typing import Optional
+from typing import Dict, Optional
 
 import ccxt
 
+import config
+
 log = logging.getLogger(__name__)
 
 
 class RateLimit:
-    exchangedict = {}
+    exchangedict: Dict[str, int] = {}
 
     def limit(self, exchange):
         if lastcall := self.exchangedict.get(exchange.id):
@@ -46,7 +47,7 @@ def __init__(
 
         # Saves the priority for a certain path so that bad paths can be skipped.
         self.priority: collections.defaultdict[str, int] = collections.defaultdict(int)
-        allpairs: set(tuple[str, str, str, str]) = set()
+        allpairs: set[tuple[str, str, str, str]] = set()
 
         for exchange_id in exchanges:
             exchange_class = getattr(ccxt, exchange_id)
@@ -165,7 +166,8 @@ def comb_sort_key(path):
                     elif possiblepath[1][1]["avg_vol"] != 0:
                         # is very much off because volume is not in the same
                         # currency something for later
-                        # volumenew*= volume of next thing in path (needs to be fixed for inverted paths)
+                        # volumenew*= volume of next thing in path
+                        # (needs to be fixed for inverted paths)
                         volumenew *= possiblepath[1][1]["avg_vol"]
 
                 else:
diff --git a/src/price_data.py b/src/price_data.py
index 1f187054..dc2edac4 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -767,7 +767,8 @@ def merge_prices(a: list, b: Optional[list] = None) -> list:
             path = self.path.get_path(
                 coin, reference_coin, first, last, preferredexchange=preferredexchange
             )
-            # Todo Move the path calculation out of the for loop and only filter after time
+            # Todo Move the path calculation out of the for loop
+            # and only filter after time
             for p in path:
                 tempdatalis: list = []
                 printstr = [f"{a[1]['symbol']} ({a[1]['exchange']})" for a in p[1]]
diff --git a/src/transaction.py b/src/transaction.py
index a3f4f52c..46c9af38 100644
--- a/src/transaction.py
+++ b/src/transaction.py
@@ -140,7 +140,6 @@ class TaxEvent:
     remark: str = ""
 
 
-
 # Functions
 
 
@@ -200,6 +199,7 @@ def time_batches(
                 max_time = timestamp + datetime.timedelta(minutes=max_difference)
     yield batch  # fixes bug where last batch ist not yielded
 
+
 gain_operations = [
     CoinLendEnd,
     StakingEnd,
@@ -246,4 +246,3 @@ def key(op: Operation) -> tuple:
         return tuple([idx] + [getattr(op, key) for key in keys] if keys else [])
 
     return sorted(operations, key=key)
-

From c56ba999542bf672dd6d3b6604c6b5ac62dc698d Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Wed, 8 Sep 2021 10:39:04 +0200
Subject: [PATCH 44/53] fix formatting

---
 src/graph.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index b30ad209..1f0717d6 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -1,7 +1,7 @@
 import collections
 import logging
 import time
-from typing import Dict, Optional
+from typing import Dict, List, Optional, Tuple
 
 import ccxt
 
@@ -47,7 +47,7 @@ def __init__(
 
         # Saves the priority for a certain path so that bad paths can be skipped.
         self.priority: collections.defaultdict[str, int] = collections.defaultdict(int)
-        allpairs: set[tuple[str, str, str, str]] = set()
+        allpairs: set[Tuple[str, str, str, str]] = set()
 
         for exchange_id in exchanges:
             exchange_class = getattr(ccxt, exchange_id)
@@ -71,13 +71,14 @@ def __init__(
         # TODO It might be faster to create it directly as set.
         #      Is it even necessary to convert it to a list?
         # allpairs = list(set(allpairs))
-        allpairs = list(allpairs)
+        allpairslist: List[Tuple[str, str, str, str]] = list(allpairs)
+        del allpairs
         # print("Total Pairs to check:", len(allpairs))
 
         # Sorting by `symbol` to have the same result on every run due to the set.
-        allpairs.sort(key=lambda x: x[3])
+        allpairslist.sort(key=lambda x: x[3])
 
-        for base, quote, exchange, symbol in allpairs:
+        for base, quote, exchange, symbol in allpairslist:
             self.add_Vertex(base)
             self.add_Vertex(quote)
             self.add_Edge(

From ada48598c37a859387d901c93037495f336a1c7d Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Sun, 28 Nov 2021 09:45:30 +0100
Subject: [PATCH 45/53] UPDATE Use types for type hinting

---
 src/graph.py | 8 ++++----
 src/misc.py  | 5 ++---
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index 1f0717d6..155af825 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -1,7 +1,7 @@
 import collections
 import logging
 import time
-from typing import Dict, List, Optional, Tuple
+from typing import Optional
 
 import ccxt
 
@@ -11,7 +11,7 @@
 
 
 class RateLimit:
-    exchangedict: Dict[str, int] = {}
+    exchangedict: dict[str, int] = {}
 
     def limit(self, exchange):
         if lastcall := self.exchangedict.get(exchange.id):
@@ -47,7 +47,7 @@ def __init__(
 
         # Saves the priority for a certain path so that bad paths can be skipped.
         self.priority: collections.defaultdict[str, int] = collections.defaultdict(int)
-        allpairs: set[Tuple[str, str, str, str]] = set()
+        allpairs: set[tuple[str, str, str, str]] = set()
 
         for exchange_id in exchanges:
             exchange_class = getattr(ccxt, exchange_id)
@@ -71,7 +71,7 @@ def __init__(
         # TODO It might be faster to create it directly as set.
         #      Is it even necessary to convert it to a list?
         # allpairs = list(set(allpairs))
-        allpairslist: List[Tuple[str, str, str, str]] = list(allpairs)
+        allpairslist: list[tuple[str, str, str, str]] = list(allpairs)
         del allpairs
         # print("Total Pairs to check:", len(allpairs))
 
diff --git a/src/misc.py b/src/misc.py
index cb4846ca..f8ca23f2 100644
--- a/src/misc.py
+++ b/src/misc.py
@@ -28,7 +28,6 @@
     Optional,
     SupportsFloat,
     SupportsInt,
-    Tuple,
     TypeVar,
     Union,
     cast,
@@ -122,7 +121,7 @@ def to_decimal_timestamp(d: datetime.datetime) -> decimal.Decimal:
 def get_offset_timestamps(
     utc_time: datetime.datetime,
     offset: datetime.timedelta,
-) -> Tuple[int, int]:
+) -> tuple[int, int]:
     """Return timestamps in milliseconds `offset/2` before/after `utc_time`.
 
     Args:
@@ -130,7 +129,7 @@ def get_offset_timestamps(
         offset (datetime.timedelta)
 
     Returns:
-        Tuple[int, int]: Timestamps in milliseconds.
+        tuple[int, int]: Timestamps in milliseconds.
     """
     start = utc_time - offset / 2
     end = utc_time + offset / 2

From ee6fdcf573259de04051a0dd596aca90a7e6be3b Mon Sep 17 00:00:00 2001
From: Griffsano <18743559+Griffsano@users.noreply.github.com>
Date: Sun, 26 Dec 2021 11:58:30 +0100
Subject: [PATCH 46/53] warning if exchange for CSV export is not in found in
 CCTX list

---
 src/book.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/src/book.py b/src/book.py
index b3ce0536..a0409826 100644
--- a/src/book.py
+++ b/src/book.py
@@ -856,6 +856,24 @@ def read_file(self, file_path: Path) -> None:
 
             log.info("Reading file from exchange %s at %s", exchange, file_path)
             read_file(file_path)
+
+            cctx_mapping = {
+                "binance": "binance",
+                "binance_v2": "binance",
+                "coinbase": "coinbasepro",
+                "coinbase_pro": "coinbasepro",
+                "kraken_ledgers_old": "kraken",
+                "kraken_ledgers": "kraken",
+                "kraken_trades": "kraken",
+                "bitpanda_pro_trades": "bitpanda",
+            }
+            api = cctx_mapping.get(exchange)
+
+            if api not in config.EXCHANGES:
+                log.warning(
+                    f"Exchange `{api}` not found in EXCHANGES API list in config.py. "
+                    "Consider adding it to obtain more accurate price data."
+                )
         else:
             log.warning(
                 f"Unable to detect the exchange of file `{file_path}`. "

From bbd8bb81b9c2219d0c4c96e43d32ef3fc2312772 Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Sun, 26 Dec 2021 13:46:11 +0100
Subject: [PATCH 47/53] formatting

---
 src/taxman.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/taxman.py b/src/taxman.py
index 25dfa039..f1ce7809 100644
--- a/src/taxman.py
+++ b/src/taxman.py
@@ -251,23 +251,23 @@ def _evaluate_taxation_per_coin(
     def evaluate_taxation(self) -> None:
         """Evaluate the taxation using country specific function."""
         log.debug("Starting evaluation...")
-        
+
         if config.MULTI_DEPOT:
             # Evaluate taxation separated by platforms and coins.
-            for _, operations in misc.group_by(
+            for platform, operations in misc.group_by(
                 self.book.operations, "platform"
             ).items():
                 for coin, _operations in misc.group_by(operations, "coin").items():
-                    self.price_data.preload_prices(_operations, coin, platform)              
+                    self.price_data.preload_prices(_operations, coin, platform)
                 self._evaluate_taxation_per_coin(operations)
         else:
-            
-            for platform, _operations in misc.group_by(operations, "platform").items():
-                for coin, coin_operations in misc.group_by(_operations, "coin").items():
-                    self.price_data.preload_prices(coin_operations, coin, platform)
+
+            for plat, _ops in misc.group_by(self.book.operations, "platform").items():
+                for coin, coin_operations in misc.group_by(_ops, "coin").items():
+                    self.price_data.preload_prices(coin_operations, coin, plat)
             # Evaluate taxation separated by coins in a single virtual depot.
             self._evaluate_taxation_per_coin(self.book.operations)
-            
+
     def print_evaluation(self) -> None:
         """Print short summary of evaluation to stdout."""
         # Summarize the tax evaluation.

From 8f24604b973354030e4450185acd364d94b7d0c6 Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Fri, 31 Dec 2021 16:04:29 +0100
Subject: [PATCH 48/53] added progress counter and sorted operations before
 fetching prices

---
 src/taxman.py | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/src/taxman.py b/src/taxman.py
index f1ce7809..1c2a774c 100644
--- a/src/taxman.py
+++ b/src/taxman.py
@@ -251,20 +251,26 @@ def _evaluate_taxation_per_coin(
     def evaluate_taxation(self) -> None:
         """Evaluate the taxation using country specific function."""
         log.debug("Starting evaluation...")
+        counter = 0
+        total_operations = len(self.book.operations)
+        for plat, _ops in misc.group_by(self.book.operations, "platform").items():
+            for coin, coin_operations in misc.group_by(_ops, "coin").items():
+                s_operations = transaction.sort_operations(
+                    coin_operations, ["utc_time"]
+                )
+                self.price_data.preload_prices(s_operations, coin, plat)
+                counter += len(coin_operations)
+                log.info(f"{counter} out of {total_operations} operations processed.")
+                log.info(f"{counter/total_operations*100}% done")
 
         if config.MULTI_DEPOT:
             # Evaluate taxation separated by platforms and coins.
             for platform, operations in misc.group_by(
                 self.book.operations, "platform"
             ).items():
-                for coin, _operations in misc.group_by(operations, "coin").items():
-                    self.price_data.preload_prices(_operations, coin, platform)
+
                 self._evaluate_taxation_per_coin(operations)
         else:
-
-            for plat, _ops in misc.group_by(self.book.operations, "platform").items():
-                for coin, coin_operations in misc.group_by(_ops, "coin").items():
-                    self.price_data.preload_prices(coin_operations, coin, plat)
             # Evaluate taxation separated by coins in a single virtual depot.
             self._evaluate_taxation_per_coin(self.book.operations)
 

From 84f7e866bb718eb56ff28be74231a26b02f3245b Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Fri, 31 Dec 2021 17:32:21 +0100
Subject: [PATCH 49/53] kraken ignore and warning and formatting

---
 src/graph.py  | 7 ++++++-
 src/taxman.py | 2 +-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index 155af825..6630396f 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -54,7 +54,12 @@ def __init__(
             exchange = exchange_class()
             markets = exchange.fetch_markets()
             assert isinstance(markets, list)
-
+            if exchange_id == "kraken":
+                log.warning(
+                    """Kraken is currently not supported due to only supporting 
+                        the last 720 candles of historic data"""
+                )
+                continue
             if exchange.has["fetchOHLCV"]:
                 toadd = [
                     (i["base"], i["quote"], exchange_id, i["symbol"]) for i in markets
diff --git a/src/taxman.py b/src/taxman.py
index 1c2a774c..bab37a6d 100644
--- a/src/taxman.py
+++ b/src/taxman.py
@@ -265,7 +265,7 @@ def evaluate_taxation(self) -> None:
 
         if config.MULTI_DEPOT:
             # Evaluate taxation separated by platforms and coins.
-            for platform, operations in misc.group_by(
+            for _platform, operations in misc.group_by(
                 self.book.operations, "platform"
             ).items():
 

From 6fc09486f12753d1952701194a1c2f6734eec509 Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Fri, 31 Dec 2021 17:33:54 +0100
Subject: [PATCH 50/53] formatting

---
 src/graph.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/graph.py b/src/graph.py
index 6630396f..7c235e0c 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -56,7 +56,7 @@ def __init__(
             assert isinstance(markets, list)
             if exchange_id == "kraken":
                 log.warning(
-                    """Kraken is currently not supported due to only supporting 
+                    """Kraken is currently not supported due to only supporting
                         the last 720 candles of historic data"""
                 )
                 continue

From 11d5849a9bc4c2d34048ed2c04a7ef7a4d0bcc24 Mon Sep 17 00:00:00 2001
From: Griffsano <18743559+Griffsano@users.noreply.github.com>
Date: Mon, 10 Jan 2022 10:17:15 +0100
Subject: [PATCH 51/53] Ohlcv update (#4)

* progress bar output: Reduced to one line

* flake8

* variable and file naming

* detailed warning if price already exists in database

* do not preload prices for Kraken
---
 src/book.py       |  6 +++---
 src/price_data.py | 16 +++++++++++++---
 src/taxman.py     |  6 ++++--
 3 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/src/book.py b/src/book.py
index dae7ae8f..cb147ab5 100644
--- a/src/book.py
+++ b/src/book.py
@@ -858,7 +858,7 @@ def read_file(self, file_path: Path) -> None:
             log.info("Reading file from exchange %s at %s", exchange, file_path)
             read_file(file_path)
 
-            cctx_mapping = {
+            ccxt_mapping = {
                 "binance": "binance",
                 "binance_v2": "binance",
                 "coinbase": "coinbasepro",
@@ -868,11 +868,11 @@ def read_file(self, file_path: Path) -> None:
                 "kraken_trades": "kraken",
                 "bitpanda_pro_trades": "bitpanda",
             }
-            api = cctx_mapping.get(exchange)
+            api = ccxt_mapping.get(exchange)
 
             if api not in config.EXCHANGES:
                 log.warning(
-                    f"Exchange `{api}` not found in EXCHANGES API list in config.py. "
+                    f"Exchange `{api}` not found in EXCHANGES API list in config.ini. "
                     "Consider adding it to obtain more accurate price data."
                 )
         else:
diff --git a/src/price_data.py b/src/price_data.py
index a583cce8..82681677 100644
--- a/src/price_data.py
+++ b/src/price_data.py
@@ -594,10 +594,14 @@ def set_price_db(
             if str(e) == f"UNIQUE constraint failed: {tablename}.utc_time":
                 price_db = self.get_price(platform, coin, utc_time, reference_coin)
                 if price != price_db:
+                    rel_error = abs(price - price_db) / price * 100
                     log.warning(
-                        "Tried to write price to database, "
-                        "but a different price exists already."
-                        f"({platform=}, {tablename=}, {utc_time=}, {price=})"
+                        f"Tried to write {tablename} price to database, but a "
+                        f"different price exists already ({platform} @ {utc_time})"
+                    )
+                    log.warning(
+                        f"price: {price}, database price: {price_db}, "
+                        f"relative error: %.6f %%", rel_error
                     )
             else:
                 raise e
@@ -942,6 +946,12 @@ def preload_prices(
         if not operations or coin == reference_coin:
             return
 
+        if platform == "kraken":
+            log.warning(
+                f"Will not preload prices for {platform}, reverting to default API."
+            )
+            return
+
         # Only consider the operations for which we have no prices in the database.
         missing_prices_operations = self.get_missing_price_operations(
             operations, coin, platform, reference_coin
diff --git a/src/taxman.py b/src/taxman.py
index bab37a6d..afed9b92 100644
--- a/src/taxman.py
+++ b/src/taxman.py
@@ -260,8 +260,10 @@ def evaluate_taxation(self) -> None:
                 )
                 self.price_data.preload_prices(s_operations, coin, plat)
                 counter += len(coin_operations)
-                log.info(f"{counter} out of {total_operations} operations processed.")
-                log.info(f"{counter/total_operations*100}% done")
+                log.info(
+                    "{:6.2f} % done, {:6d} out of {:d} operations processed".
+                    format(counter / total_operations * 100, counter, total_operations)
+                )
 
         if config.MULTI_DEPOT:
             # Evaluate taxation separated by platforms and coins.

From 080f475f76984a51124964bf90f527621497be8a Mon Sep 17 00:00:00 2001
From: scientes <34819304+scientes@users.noreply.github.com>
Date: Fri, 28 Jan 2022 11:50:30 +0100
Subject: [PATCH 52/53] unpin ccxt

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 011271fd..30ba4a41 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ aiodns==2.0.0
 aiohttp==3.7.4.post0
 async-timeout==3.0.1
 attrs==20.3.0
-ccxt==1.42.7
+ccxt>=1.42.7
 certifi==2020.12.5
 cffi==1.14.5
 chardet==4.0.0

From 4f2dfe74e25c61e7c2d7ee778333980346006611 Mon Sep 17 00:00:00 2001
From: Jeppy <carsten.docktor@gmail.com>
Date: Sun, 6 Feb 2022 13:17:07 +0100
Subject: [PATCH 53/53] UPDATE warning when ccxt mapping is missing for
 exchange - ADD comment

---
 src/book.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/book.py b/src/book.py
index bd9dee41..7880f4eb 100644
--- a/src/book.py
+++ b/src/book.py
@@ -1132,6 +1132,9 @@ def read_file(self, file_path: Path) -> None:
             log.info("Reading file from exchange %s at %s", exchange, file_path)
             read_file(file_path)
 
+            # Check whether the given exchange is "supported" by our ccxt
+            # implementation, by comparing the platform with the listed
+            # ccxt exchanges in our config.
             ccxt_mapping = {
                 "binance": "binance",
                 "binance_v2": "binance",
@@ -1143,8 +1146,12 @@ def read_file(self, file_path: Path) -> None:
                 "bitpanda_pro_trades": "bitpanda",
             }
             api = ccxt_mapping.get(exchange)
-
-            if api not in config.EXCHANGES:
+            if api is None:
+                log.warning(
+                    f"The exchange {exchange} is not mapped to a ccxt exchange. "
+                    "Please add the exchange to the ccxt_mapping dictionary."
+                )
+            elif api not in config.EXCHANGES:
                 log.warning(
                     f"Exchange `{api}` not found in EXCHANGES API list in config.ini. "
                     "Consider adding it to obtain more accurate price data."