From e408e0fbbaef959faaa75a8839182c198848c2d1 Mon Sep 17 00:00:00 2001
From: Toshiyuki Yokoyama <yokoyama@no.reply>
Date: Tue, 9 Jul 2019 16:13:57 +0900
Subject: [PATCH 01/14] #4: Add DAGify method for linearizing the order of
 nodes

---
 src/gfa.py   |  22 ++++++++
 src/graph.py |   9 ++--
 src/sort.py  | 140 +++++++++++++++++++++++++++++++++++++++++++++++++++
 src/test.py  |  19 +++++++
 4 files changed, 185 insertions(+), 5 deletions(-)
 create mode 100644 src/sort.py

diff --git a/src/gfa.py b/src/gfa.py
index f4e64ba..8d84bc9 100644
--- a/src/gfa.py
+++ b/src/gfa.py
@@ -120,6 +120,28 @@ def from_graph(cls, graph: Graph):
             gfa.add_line('\t'.join(['P', path_key, "+,".join(path_values)+"+", ",".join(['*' for _ in path_values])]))
         return cls(gfa)
 
+    @property
+    def to_paths(self) -> List[Path]:
+        node_hash = {}
+        for segment in self.gfa.segments:
+            node_id = segment.name + "+"
+            node = Node(segment.sequence, [])
+            node_hash[node_id] = node
+
+            node_id = segment.name + "-"
+            node = Node(segment.sequence, [])
+            node_hash[node_id] = node
+
+        paths = []
+        for path in self.gfa.paths:
+            nodes = []
+            for node in path.segment_names:
+                node_index = NodeIndex(Node(node_hash[node.name + node.orient].seq, [], node.name), node.orient)
+                nodes.append(node_index)
+            paths.append(Path(path.name, nodes))
+
+        return paths
+
     @property
     def to_graph(self):
         topological_sort_helper = TopologicalSort()
diff --git a/src/graph.py b/src/graph.py
index 68d5001..e2b0af7 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -13,11 +13,12 @@ class NodeMissingError(ValueError):
     pass
 
 class Node:
-    def __init__(self, seq: str, paths: Iterable[int]):
+    def __init__(self, seq: str, paths: Iterable[int], index: int = 0):
         assert isinstance(seq, str), seq
         assert not isinstance(paths, str) and isinstance(paths, Iterable), paths
         self.seq = seq
         self.paths = set(paths)
+        self.index = index
 
     def __len__(self):
         return len(self.paths)
@@ -118,12 +119,10 @@ def __init__(self, name: str, nodes: List[NodeIndex]):
         self.nodes = nodes
         self.position_checkpoints = {}
 
-    def __getitem__(self, i):
-        return self.nodes[i]
-
     def __repr__(self):
         """Warning: the representation strings are very sensitive to whitespace"""
-        return self.nodes.__repr__()
+        #return self.nodes.__repr__()
+        return self.name
 
     def to_gfa(self):
         return '\t'.join(['P', self.name, "+,".join([x.node.name + x.strand for x in self.nodes])+"+", ",".join(['*' for x in self.nodes])])
diff --git a/src/sort.py b/src/sort.py
new file mode 100644
index 0000000..791f4f1
--- /dev/null
+++ b/src/sort.py
@@ -0,0 +1,140 @@
+from src.graph import *
+
+import dataclasses
+
+@dataclasses.dataclass
+class Profile:
+    node: NodeIndex
+    paths: List[Path]
+    duplicate: int = 0
+
+
+class DAGify:
+    def __init__(self, paths: List[Path], nodes = {}):
+        """
+
+        :type paths: List[Path]
+        """
+        self.paths = paths
+        self.nodes = nodes
+        self.profile = []
+
+    # def random_search_to_minimize_node_replication(self):
+
+
+    def recursive_merge(self, primary_path_index: int = 0):
+        profile = []
+        for node_index in self.paths[primary_path_index].nodes:
+            profile.append(Profile(node_index, [self.paths[primary_path_index]], 0))
+        for i, path in enumerate(self.paths):
+            if i == primary_path_index:
+                continue
+            profile = self.lcs(profile, path)
+        return profile
+
+    def lcs(self, s1: List[Profile], s2: Path) -> List[Profile]:
+        n, m = len(s1), len(s2.nodes)
+        dp = [[0] * (m+1) for _ in range(n+1)]
+
+        for i in range(1, n + 1):
+            for j in range(1, m + 1):
+                if s1[i-1].node == s2.nodes[j-1]:
+                    dp[i][j] = dp[i - 1][j - 1] + 1
+                else:
+                    dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])
+        i, j = n, m
+        index = []
+        prev = set()
+
+        while i > 0 or j > 0:
+            if s1[i-1].node == s2.nodes[j-1]:
+                prev_paths = s1[i-1].paths
+                prev_paths.append(s2)
+                index.append(Profile(s1[i-1].node, prev_paths, s1[i-1].node in prev))
+                prev.add(s1[i-1].node)
+                i -= 1
+                j -= 1
+            elif dp[i-1][j] > dp[i][j-1]:
+                prev_paths = s1[i-1].paths
+                index.append(Profile(s1[i-1].node, prev_paths, s1[i-1].node in prev))
+                prev.add(s1[i-1].node)
+                i -= 1
+            else:
+                index.append(Profile(s2.nodes[j-1], [s2], False))
+                prev.add(s2.nodes[j-1])
+                j -= 1
+
+        while i > 0:
+            prev_paths = s1[i - 1].paths
+            index.append(Profile(s1[i - 1].node, prev_paths, s1[i - 1].node in prev))
+            prev.add(s1[i - 1].node)
+            i -= 1
+
+        while j > 0:
+            prev.add(s2.nodes[j - 1])
+            index.append(Profile(s2.nodes[j - 1], [s2], False))
+            j -= 1
+
+        index.reverse()
+        self.profile = index
+
+        return index
+
+    def to_graph(self):
+        factory_input = []
+        current_slice = Slice([])
+        for prof in self.profile:
+            paths = [x.name for x in prof.paths]
+            if len(prof.paths) == len(self.paths):
+                if len(current_slice.nodes) > 0:
+                    factory_input.append(current_slice)
+                factory_input.append(Slice([Node(prof.node.node.seq, paths, prof.node.node.index)]))
+                current_slice = Slice([])
+            else:
+                all_set = set()
+                for items in [x.paths for x in current_slice.nodes]:
+                    all_set = all_set | items
+                if set(prof.paths) & all_set != set():
+                    if len(current_slice.nodes) > 0:
+                        current_slice.add_node(Node("", set([x.name for x in self.paths]) - all_set))
+                        factory_input.append(current_slice)
+                    current_slice = Slice([Node(prof.node.node.seq, paths, prof.node.node.index)])
+                else:
+                    current_slice.add_node(Node(prof.node.node.seq, paths, prof.node.node.index))
+
+        base_graph = Graph.load_from_slices(factory_input)
+        print(factory_input)
+        return base_graph
+
+    def merge(A: List[NodeIndex], B: List[NodeIndex]):
+        pos, merged = [], []
+        pi, pj, prev = 0, 0, set()
+        for i in range(len(A)):
+            for j in range(len(B)):
+                if pi <= i and pj <= j and A[i] == B[j]:
+                    curr = set()
+                    while pi < i:
+                        curr.add(A[pi])
+                        pos.append( (pi, -1, A[pi] in prev) )
+                        merged.append(A[pi])
+                        pi += 1
+                    while pj < j:
+                        curr.add(B[pj])
+                        pos.append( (-1, pj, B[pj] in prev) )
+                        merged.append(B[pj])
+                        pj += 1
+                    if i == pi and j == pj:
+                        pos.append((i, j, False))
+                        merged.append(A[i])
+                        pi += 1
+                        pj += 1
+                    prev |= curr
+        while pi < len(A):
+            pos.append( (pi, -1, A[pi] in prev) )
+            merged.append(A[pi])
+            pi += 1
+        while pj < len(B):
+            pos.append( (-1, pj, B[pj] in prev) )
+            merged.append(B[pj])
+            pj += 1
+        return pos, merged
\ No newline at end of file
diff --git a/src/test.py b/src/test.py
index 7e62f8f..b80b3f6 100644
--- a/src/test.py
+++ b/src/test.py
@@ -1,6 +1,7 @@
 import unittest
 from src.gfa import GFA
 from src.graph import Graph, Slice, Node, NoAnchorError, PathOverlapError, NoOverlapError, NodeMissingError
+from src.sort import DAGify
 
 def G(rep):
     """Short hand for Graph construction that returns a slice"""
@@ -56,6 +57,24 @@ def test_G(self):
             G([['C', {1, 2, 3, 4}], ['T', {12, 16}]])
 
 
+class DAGifyTest(unittest.TestCase):
+    """ test class of gfa.py
+    """
+
+    def test_dagify(self):
+        gfa = GFA.load_from_gfa("../test/test.gfa")
+        paths = gfa.to_paths
+        dagify = DAGify(paths)
+        dagify.recursive_merge(0)
+        graph = dagify.to_graph()
+
+        graph_by_toplogical_sort = gfa.to_graph
+        x = 'x'
+        y = 'y'
+        z = 'z'
+        self.assertEqual(graph, graph_by_toplogical_sort)
+
+
 class GFATest(unittest.TestCase):
     """ test class of gfa.py
     """

From 8e0faab24ae5097ae4e8f9ad9e3b4b2aeef63570 Mon Sep 17 00:00:00 2001
From: Toshiyuki Yokoyama <yokoyama@no.reply>
Date: Tue, 9 Jul 2019 16:36:53 +0900
Subject: [PATCH 02/14] #4: Debug of dp conditions

---
 src/sort.py    | 17 +++++++++--------
 src/test.py    | 12 ++++++++++++
 test/test2.gfa |  2 +-
 test/test3.gfa | 38 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 60 insertions(+), 9 deletions(-)
 create mode 100644 test/test3.gfa

diff --git a/src/sort.py b/src/sort.py
index 791f4f1..9b05112 100644
--- a/src/sort.py
+++ b/src/sort.py
@@ -46,28 +46,28 @@ def lcs(self, s1: List[Profile], s2: Path) -> List[Profile]:
         index = []
         prev = set()
 
-        while i > 0 or j > 0:
+        while i > 0 and j > 0:
             if s1[i-1].node == s2.nodes[j-1]:
                 prev_paths = s1[i-1].paths
                 prev_paths.append(s2)
-                index.append(Profile(s1[i-1].node, prev_paths, s1[i-1].node in prev))
-                prev.add(s1[i-1].node)
+                index.append(Profile(s1[i-1].node, prev_paths, s1[i-1].node.node.index in prev))
+                prev.add(s1[i-1].node.node.index)
                 i -= 1
                 j -= 1
             elif dp[i-1][j] > dp[i][j-1]:
                 prev_paths = s1[i-1].paths
-                index.append(Profile(s1[i-1].node, prev_paths, s1[i-1].node in prev))
-                prev.add(s1[i-1].node)
+                index.append(Profile(s1[i-1].node, prev_paths, s1[i-1].node.node.index in prev))
+                prev.add(s1[i-1].node.node.index)
                 i -= 1
             else:
                 index.append(Profile(s2.nodes[j-1], [s2], False))
-                prev.add(s2.nodes[j-1])
+                prev.add(s2.nodes[j-1].node.index)
                 j -= 1
 
         while i > 0:
             prev_paths = s1[i - 1].paths
-            index.append(Profile(s1[i - 1].node, prev_paths, s1[i - 1].node in prev))
-            prev.add(s1[i - 1].node)
+            index.append(Profile(s1[i - 1].node, prev_paths, s1[i - 1].node.node.index in prev))
+            prev.add(s1[i - 1].node.node.index)
             i -= 1
 
         while j > 0:
@@ -83,6 +83,7 @@ def lcs(self, s1: List[Profile], s2: Path) -> List[Profile]:
     def to_graph(self):
         factory_input = []
         current_slice = Slice([])
+        print(self.profile)
         for prof in self.profile:
             paths = [x.name for x in prof.paths]
             if len(prof.paths) == len(self.paths):
diff --git a/src/test.py b/src/test.py
index b80b3f6..36c525a 100644
--- a/src/test.py
+++ b/src/test.py
@@ -74,6 +74,18 @@ def test_dagify(self):
         z = 'z'
         self.assertEqual(graph, graph_by_toplogical_sort)
 
+    def test_dagify2(self):
+        gfa = GFA.load_from_gfa("../test/test2.gfa")
+        paths = gfa.to_paths
+        dagify = DAGify(paths)
+        dagify.recursive_merge(0)
+        graph = dagify.to_graph()
+
+        graph_by_toplogical_sort = gfa.to_graph
+        x = 'x'
+        y = 'y'
+        z = 'z'
+        self.assertEqual(graph, graph_by_toplogical_sort)
 
 class GFATest(unittest.TestCase):
     """ test class of gfa.py
diff --git a/test/test2.gfa b/test/test2.gfa
index 52164c1..3d206ef 100644
--- a/test/test2.gfa
+++ b/test/test2.gfa
@@ -2,7 +2,7 @@ H	VN:Z:1.0
 P	x	1+,3+,5+,6+,8+,9+,11+,12+,14+,15+	*,*,*,*,*,*,*,*,*
 P	y	1+,2+,5+,6+,8+,9+,11+,12+,14+,15+	*,*,*,*,*,*,*,*,*
 P	z	1+,2+,4+,6+,7+,9+,10+,12+,14+,15+	*,*,*,*,*,*,*,*,*
-P	a	12+,13+,15+	*,*
+P	a	7+,9+,10+	*,*
 S	1	CAAATAAG
 L	1	+	2	+	0M
 L	1	+	3	+	0M
diff --git a/test/test3.gfa b/test/test3.gfa
new file mode 100644
index 0000000..705218b
--- /dev/null
+++ b/test/test3.gfa
@@ -0,0 +1,38 @@
+H	VN:Z:1.0
+P	x	1+,3+,5+,6+,8+,9+,11+,12+,14+,15+	*,*,*,*,*,*,*,*,*
+P	y	1+,15+,5+,6+,8+,9+,11+,12+,14+,15+	*,*,*,*,*,*,*,*,*
+S	1	CAAATAAG
+L	1	+	15	+	0M
+L	1	+	3	+	0M
+S	2	A
+L	2	+	4	+	0M
+L	15	+	5	+	0M
+S	3	G
+L	3	+	4	+	0M
+L	3	+	5	+	0M
+S	4	T
+L	4	+	6	+	0M
+S	5	C
+L	5	+	6	+	0M
+S	6	TTG
+L	6	+	7	+	0M
+L	6	+	8	+	0M
+S	7	A
+L	7	+	9	+	0M
+S	8	G
+L	8	+	9	+	0M
+S	9	AAATTTTCTGGAGTTCTAT
+L	9	+	10	+	0M
+L	9	+	11	+	0M
+S	10	A
+L	10	+	12	+	0M
+S	11	T
+L	11	+	12	+	0M
+S	12	ATAT
+L	12	+	13	+	0M
+L	12	+	14	+	0M
+S	13	A
+L	13	+	15	+	0M
+S	14	T
+L	14	+	15	+	0M
+S	15	CCAACTCTCTG

From b88f9a12edc2735c587a807efa60801e67babf99 Mon Sep 17 00:00:00 2001
From: Toshiyuki Yokoyama <yokoyama@no.reply>
Date: Tue, 9 Jul 2019 23:01:32 +0900
Subject: [PATCH 03/14] #4: Add candidate_paths for store the tips

---
 src/sort.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/sort.py b/src/sort.py
index 9b05112..68447d9 100644
--- a/src/sort.py
+++ b/src/sort.py
@@ -6,6 +6,7 @@
 class Profile:
     node: NodeIndex
     paths: List[Path]
+    candidate_paths: List[Path]
     duplicate: int = 0
 
 
@@ -25,7 +26,7 @@ def __init__(self, paths: List[Path], nodes = {}):
     def recursive_merge(self, primary_path_index: int = 0):
         profile = []
         for node_index in self.paths[primary_path_index].nodes:
-            profile.append(Profile(node_index, [self.paths[primary_path_index]], 0))
+            profile.append(Profile(node_index, [self.paths[primary_path_index]], [self.paths[primary_path_index]], 0))
         for i, path in enumerate(self.paths):
             if i == primary_path_index:
                 continue
@@ -50,6 +51,8 @@ def lcs(self, s1: List[Profile], s2: Path) -> List[Profile]:
             if s1[i-1].node == s2.nodes[j-1]:
                 prev_paths = s1[i-1].paths
                 prev_paths.append(s2)
+                candidate_paths = s1[i-1].candidate_paths
+
                 index.append(Profile(s1[i-1].node, prev_paths, s1[i-1].node.node.index in prev))
                 prev.add(s1[i-1].node.node.index)
                 i -= 1

From 88d1a7b8e9d1ee11f4d2fa8b5f3697d39a60a31e Mon Sep 17 00:00:00 2001
From: Toshiyuki Yokoyama <yokoyama@no.reply>
Date: Wed, 10 Jul 2019 12:58:21 +0900
Subject: [PATCH 04/14] #4: Fix dagify method for graphs that have a
 duplication (WIP)

---
 src/sort.py | 90 +++++++++++++++++++++++------------------------------
 src/test.py | 15 +++++++--
 2 files changed, 51 insertions(+), 54 deletions(-)

diff --git a/src/sort.py b/src/sort.py
index 68447d9..5de1472 100644
--- a/src/sort.py
+++ b/src/sort.py
@@ -6,8 +6,8 @@
 class Profile:
     node: NodeIndex
     paths: List[Path]
-    candidate_paths: List[Path]
-    duplicate: int = 0
+    candidate_paths: set()
+    duplicate: bool = False
 
 
 class DAGify:
@@ -20,13 +20,20 @@ def __init__(self, paths: List[Path], nodes = {}):
         self.nodes = nodes
         self.profile = []
 
-    # def random_search_to_minimize_node_replication(self):
-
-
-    def recursive_merge(self, primary_path_index: int = 0):
+    def search_for_minimizing_replications(self) -> (List[Profile], int):
+        min_rep = len(self.nodes)
+        profile = []
+        for i, _ in enumerate(self.paths):
+            profile_candidate = self.recursive_merge(i)
+            if min_rep > sum([x for x in profile if x]):
+                min_rep = sum([x for x in profile if x])
+                profile = profile_candidate
+        return profile, min_rep
+
+    def recursive_merge(self, primary_path_index: int = 0) -> List[Profile]:
         profile = []
         for node_index in self.paths[primary_path_index].nodes:
-            profile.append(Profile(node_index, [self.paths[primary_path_index]], [self.paths[primary_path_index]], 0))
+            profile.append(Profile(node_index, [self.paths[primary_path_index]], {self.paths[primary_path_index].name}, 0))
         for i, path in enumerate(self.paths):
             if i == primary_path_index:
                 continue
@@ -46,36 +53,48 @@ def lcs(self, s1: List[Profile], s2: Path) -> List[Profile]:
         i, j = n, m
         index = []
         prev = set()
+        candidate_path_flag = False
 
         while i > 0 and j > 0:
             if s1[i-1].node == s2.nodes[j-1]:
                 prev_paths = s1[i-1].paths
                 prev_paths.append(s2)
                 candidate_paths = s1[i-1].candidate_paths
+                candidate_paths.add(s2.name)
+                candidate_path_flag = True
 
-                index.append(Profile(s1[i-1].node, prev_paths, s1[i-1].node.node.index in prev))
+                index.append(Profile(s1[i-1].node, prev_paths, candidate_paths, s1[i-1].node.node.index in prev))
                 prev.add(s1[i-1].node.node.index)
                 i -= 1
                 j -= 1
             elif dp[i-1][j] > dp[i][j-1]:
                 prev_paths = s1[i-1].paths
-                index.append(Profile(s1[i-1].node, prev_paths, s1[i-1].node.node.index in prev))
+                candidate_paths = s1[i-1].candidate_paths
+                if candidate_path_flag:
+                    candidate_paths.add(s2.name)
+                index.append(Profile(s1[i-1].node, prev_paths, candidate_paths, s1[i-1].node.node.index in prev))
                 prev.add(s1[i-1].node.node.index)
                 i -= 1
             else:
-                index.append(Profile(s2.nodes[j-1], [s2], False))
+                candidate_paths = {s2.name}
+                if s1[i]:
+                    candidate_paths |= s1[i].candidate_paths
+                if s1[i-1]:
+                    candidate_paths |= s1[i-1].candidate_paths
+                index.append(Profile(s2.nodes[j-1], [s2], candidate_paths, False))
                 prev.add(s2.nodes[j-1].node.index)
                 j -= 1
 
         while i > 0:
             prev_paths = s1[i - 1].paths
-            index.append(Profile(s1[i - 1].node, prev_paths, s1[i - 1].node.node.index in prev))
+            prev_candidates = s1[i-1].candidate_paths
+            index.append(Profile(s1[i - 1].node, prev_paths, prev_candidates, s1[i - 1].node.node.index in prev))
             prev.add(s1[i - 1].node.node.index)
             i -= 1
 
         while j > 0:
             prev.add(s2.nodes[j - 1])
-            index.append(Profile(s2.nodes[j - 1], [s2], False))
+            index.append(Profile(s2.nodes[j - 1], [s2], {s2.name}, False))
             j -= 1
 
         index.reverse()
@@ -86,10 +105,10 @@ def lcs(self, s1: List[Profile], s2: Path) -> List[Profile]:
     def to_graph(self):
         factory_input = []
         current_slice = Slice([])
-        print(self.profile)
+        # print(self.profile)
         for prof in self.profile:
             paths = [x.name for x in prof.paths]
-            if len(prof.paths) == len(self.paths):
+            if len(prof.paths) == len(prof.candidate_paths):
                 if len(current_slice.nodes) > 0:
                     factory_input.append(current_slice)
                 factory_input.append(Slice([Node(prof.node.node.seq, paths, prof.node.node.index)]))
@@ -97,48 +116,17 @@ def to_graph(self):
             else:
                 all_set = set()
                 for items in [x.paths for x in current_slice.nodes]:
-                    all_set = all_set | items
-                if set(prof.paths) & all_set != set():
+                    all_set |= items
+                # print(all_set, prof.candidate_paths, prof.paths, set([x.name for x in prof.paths]) & all_set)
+                if set([x.name for x in prof.paths])  & all_set != set():
                     if len(current_slice.nodes) > 0:
-                        current_slice.add_node(Node("", set([x.name for x in self.paths]) - all_set))
+                        if prof.candidate_paths - all_set != set():
+                            current_slice.add_node(Node("", prof.candidate_paths - all_set))
                         factory_input.append(current_slice)
                     current_slice = Slice([Node(prof.node.node.seq, paths, prof.node.node.index)])
                 else:
                     current_slice.add_node(Node(prof.node.node.seq, paths, prof.node.node.index))
 
         base_graph = Graph.load_from_slices(factory_input)
-        print(factory_input)
+        # print(factory_input)
         return base_graph
-
-    def merge(A: List[NodeIndex], B: List[NodeIndex]):
-        pos, merged = [], []
-        pi, pj, prev = 0, 0, set()
-        for i in range(len(A)):
-            for j in range(len(B)):
-                if pi <= i and pj <= j and A[i] == B[j]:
-                    curr = set()
-                    while pi < i:
-                        curr.add(A[pi])
-                        pos.append( (pi, -1, A[pi] in prev) )
-                        merged.append(A[pi])
-                        pi += 1
-                    while pj < j:
-                        curr.add(B[pj])
-                        pos.append( (-1, pj, B[pj] in prev) )
-                        merged.append(B[pj])
-                        pj += 1
-                    if i == pi and j == pj:
-                        pos.append((i, j, False))
-                        merged.append(A[i])
-                        pi += 1
-                        pj += 1
-                    prev |= curr
-        while pi < len(A):
-            pos.append( (pi, -1, A[pi] in prev) )
-            merged.append(A[pi])
-            pi += 1
-        while pj < len(B):
-            pos.append( (-1, pj, B[pj] in prev) )
-            merged.append(B[pj])
-            pj += 1
-        return pos, merged
\ No newline at end of file
diff --git a/src/test.py b/src/test.py
index 36c525a..d366df6 100644
--- a/src/test.py
+++ b/src/test.py
@@ -58,7 +58,7 @@ def test_G(self):
 
 
 class DAGifyTest(unittest.TestCase):
-    """ test class of gfa.py
+    """ test class of sort.py
     """
 
     def test_dagify(self):
@@ -81,11 +81,20 @@ def test_dagify2(self):
         dagify.recursive_merge(0)
         graph = dagify.to_graph()
 
-        graph_by_toplogical_sort = gfa.to_graph
+        a = 'a'
         x = 'x'
         y = 'y'
         z = 'z'
-        self.assertEqual(graph, graph_by_toplogical_sort)
+        self.assertEqual(graph, [['CAAATAAG', {x, y, z}], ['G', {x}, 'A', {y, z}], ['C', {x, y}, 'T', {z}], ['TTG', {x, y, z}], ['G', {x, y}, 'A', {a, z}], ['AAATTTTCTGGAGTTCTAT', {a, x, y, z}], ['A', {a, z}, 'T', {x, y}], ['ATAT', {x, y, z}], ['T', {x, y, z}], ['CCAACTCTCTG', {x, y, z}]])
+
+    def test_dagify3(self):
+        gfa = GFA.load_from_gfa("../test/test3.gfa")
+        paths = gfa.to_paths
+        dagify = DAGify(paths)
+        dagify.recursive_merge(0)
+        graph = dagify.to_graph()
+        print(graph)
+
 
 class GFATest(unittest.TestCase):
     """ test class of gfa.py

From 00148e91cc3d28b7db3d9414cd9cde0097bed417 Mon Sep 17 00:00:00 2001
From: Toshiyuki Yokoyama <yokoyama@no.reply>
Date: Wed, 10 Jul 2019 17:34:48 +0900
Subject: [PATCH 05/14] #4: Fix dagify method for graphs that have an
 alternative paths

---
 src/sort.py                                   | 20 ++++-----
 src/test.py                                   | 44 +++++++++++++------
 ...ernately_paths.gfa => alternate_paths.gfa} |  0
 3 files changed, 40 insertions(+), 24 deletions(-)
 rename test/{alternately_paths.gfa => alternate_paths.gfa} (100%)

diff --git a/src/sort.py b/src/sort.py
index 5de1472..003b181 100644
--- a/src/sort.py
+++ b/src/sort.py
@@ -9,31 +9,32 @@ class Profile:
     candidate_paths: set()
     duplicate: bool = False
 
+    def __repr__(self):
+        return "["+str(self.node.node) + str(self.paths)+"]"
 
 class DAGify:
-    def __init__(self, paths: List[Path], nodes = {}):
+    def __init__(self, paths: List[Path], nodes={}):
         """
 
         :type paths: List[Path]
         """
         self.paths = paths
         self.nodes = nodes
-        self.profile = []
 
     def search_for_minimizing_replications(self) -> (List[Profile], int):
-        min_rep = len(self.nodes)
+        min_rep = sys.maxsize
         profile = []
         for i, _ in enumerate(self.paths):
             profile_candidate = self.recursive_merge(i)
-            if min_rep > sum([x for x in profile if x]):
-                min_rep = sum([x for x in profile if x])
+            if min_rep > len([x.duplicate for x in profile_candidate if x.duplicate]):
+                min_rep = len([x.duplicate for x in profile_candidate if x.duplicate])
                 profile = profile_candidate
         return profile, min_rep
 
     def recursive_merge(self, primary_path_index: int = 0) -> List[Profile]:
         profile = []
         for node_index in self.paths[primary_path_index].nodes:
-            profile.append(Profile(node_index, [self.paths[primary_path_index]], {self.paths[primary_path_index].name}, 0))
+            profile.append(Profile(node_index, [self.paths[primary_path_index]], {self.paths[primary_path_index].name}, False))
         for i, path in enumerate(self.paths):
             if i == primary_path_index:
                 continue
@@ -81,7 +82,7 @@ def lcs(self, s1: List[Profile], s2: Path) -> List[Profile]:
                     candidate_paths |= s1[i].candidate_paths
                 if s1[i-1]:
                     candidate_paths |= s1[i-1].candidate_paths
-                index.append(Profile(s2.nodes[j-1], [s2], candidate_paths, False))
+                index.append(Profile(s2.nodes[j-1], [s2], candidate_paths, s2.nodes[j-1].node.index in prev))
                 prev.add(s2.nodes[j-1].node.index)
                 j -= 1
 
@@ -98,15 +99,14 @@ def lcs(self, s1: List[Profile], s2: Path) -> List[Profile]:
             j -= 1
 
         index.reverse()
-        self.profile = index
 
         return index
 
-    def to_graph(self):
+    def to_graph(self, profile: List[Profile]):
         factory_input = []
         current_slice = Slice([])
         # print(self.profile)
-        for prof in self.profile:
+        for prof in profile:
             paths = [x.name for x in prof.paths]
             if len(prof.paths) == len(prof.candidate_paths):
                 if len(current_slice.nodes) > 0:
diff --git a/src/test.py b/src/test.py
index d366df6..e8a5061 100644
--- a/src/test.py
+++ b/src/test.py
@@ -56,11 +56,15 @@ def test_G(self):
         with self.assertRaises(ValueError):
             G([['C', {1, 2, 3, 4}], ['T', {12, 16}]])
 
-
+a = 'a'
+x = 'x'
+y = 'y'
+z = 'z'
 class DAGifyTest(unittest.TestCase):
     """ test class of sort.py
     """
 
+
     def test_dagify(self):
         gfa = GFA.load_from_gfa("../test/test.gfa")
         paths = gfa.to_paths
@@ -69,31 +73,43 @@ def test_dagify(self):
         graph = dagify.to_graph()
 
         graph_by_toplogical_sort = gfa.to_graph
-        x = 'x'
-        y = 'y'
-        z = 'z'
         self.assertEqual(graph, graph_by_toplogical_sort)
 
     def test_dagify2(self):
         gfa = GFA.load_from_gfa("../test/test2.gfa")
         paths = gfa.to_paths
         dagify = DAGify(paths)
-        dagify.recursive_merge(0)
-        graph = dagify.to_graph()
-
-        a = 'a'
-        x = 'x'
-        y = 'y'
-        z = 'z'
+        profile = dagify.recursive_merge(0)
+        graph = dagify.to_graph(profile)
         self.assertEqual(graph, [['CAAATAAG', {x, y, z}], ['G', {x}, 'A', {y, z}], ['C', {x, y}, 'T', {z}], ['TTG', {x, y, z}], ['G', {x, y}, 'A', {a, z}], ['AAATTTTCTGGAGTTCTAT', {a, x, y, z}], ['A', {a, z}, 'T', {x, y}], ['ATAT', {x, y, z}], ['T', {x, y, z}], ['CCAACTCTCTG', {x, y, z}]])
 
     def test_dagify3(self):
         gfa = GFA.load_from_gfa("../test/test3.gfa")
         paths = gfa.to_paths
         dagify = DAGify(paths)
-        dagify.recursive_merge(0)
-        graph = dagify.to_graph()
-        print(graph)
+        profile, rep_count = dagify.search_for_minimizing_replications()
+        graph = dagify.to_graph(profile)
+        self.assertEqual(rep_count, 1)
+        self.assertEqual(graph, [['CAAATAAG', {x, y}], ['CCAACTCTCTG', {y}, 'G', {x}], ['C', {x, y}], ['TTG', {x, y}], ['G', {x, y}], ['AAATTTTCTGGAGTTCTAT', {x, y}], ['T', {x, y}], ['ATAT', {x, y}], ['T', {x, y}], ['CCAACTCTCTG', {x, y}]])
+
+    def test_dagify_altpath(self):
+        gfa = GFA.load_from_gfa("../test/alternate_paths.gfa")
+        paths = gfa.to_paths
+        dagify = DAGify(paths)
+        profile, rep_count = dagify.search_for_minimizing_replications()
+        graph = dagify.to_graph(profile)
+        self.assertEqual(rep_count, 1)
+        self.assertEqual(graph, [['CAAATAAG', {x, y}], ['A', {x}], ['G', {x, y}], ['A', {y}], ['T', {x, y}]])
+
+    def test_dagify_dup(self):
+        gfa = GFA.load_from_gfa("../test/duplicate.gfa")
+        paths = gfa.to_paths
+        dagify = DAGify(paths)
+        profile, rep_count = dagify.search_for_minimizing_replications()
+        graph = dagify.to_graph(profile)
+        self.assertEqual(rep_count, 2)
+        self.assertEqual(graph, [['CAAATAAG', {x, y}], ['', {x}, 'A', {y}], ['G', {y}], ['A', {x, y}], ['G', {x, y}], ['T', {x, y}]])
+
 
 
 class GFATest(unittest.TestCase):
diff --git a/test/alternately_paths.gfa b/test/alternate_paths.gfa
similarity index 100%
rename from test/alternately_paths.gfa
rename to test/alternate_paths.gfa

From c9da4b99ea01ac82291c9d20db4697d87c11e505 Mon Sep 17 00:00:00 2001
From: Toshiyuki Yokoyama <yokoyama@no.reply>
Date: Wed, 17 Jul 2019 22:44:34 +0900
Subject: [PATCH 06/14] #13: Fix class definition (WIP)

---
 src/gfa.py   |  4 ++--
 src/graph.py | 15 +++++++++----
 src/sort.py  | 59 +++++++++++++++++++++++++++++-----------------------
 src/test.py  | 16 +++++++-------
 4 files changed, 53 insertions(+), 41 deletions(-)

diff --git a/src/gfa.py b/src/gfa.py
index 0f35741..3195e73 100644
--- a/src/gfa.py
+++ b/src/gfa.py
@@ -131,7 +131,7 @@ def to_paths(self) -> List[Path]:
         for path in self.gfa.paths:
             nodes = []
             for node in path.segment_names:
-                node_index = NodeIndex(Node(node_hash[node.name + node.orient].seq, [], node.name), node.orient)
+                node_index = NodeTraversal(Node(node_hash[node.name + node.orient].seq, [], node.name), node.orient)
                 nodes.append(node_index)
             paths.append(Path(path.name, nodes))
 
@@ -196,7 +196,7 @@ def to_graph(self):
                 else:
                     current_slice.add_node(node_hash[node])
 
-        base_graph = Graph.load_from_slices(factory_input)
+        base_graph = SlicedGraph.load_from_slices(factory_input, self.gfa.paths)
         return base_graph
 
 
diff --git a/src/graph.py b/src/graph.py
index 1a9f21d..1212995 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -125,9 +125,9 @@ class Path:
     was sequenced.  A path visits a series of nodes and the ordered concatenation of the node
     sequences is the accession's genome.  Create Paths first from accession names, then append
     them to Nodes to link together."""
-    def __init__(self, accession: str):
+    def __init__(self, accession: str, nodes = []):
         self.accession = accession  # one path per accessions
-        self.nodes = [] # List[NodeTraversal]
+        self.nodes = nodes # List[NodeTraversal]
         self.position_checkpoints = {}  # TODO: currently not used
 
     def __getitem__(self, path_index):
@@ -150,6 +150,9 @@ def append_node(self, node: Node, strand: str):
         node.paths.add(PathIndex(self, len(self.nodes)-1))  # already appended node
         return node
 
+    def name(self):
+        return self.accession
+
     def to_gfa(self):
         return '\t'.join(['P', self.accession, "+,".join([x.node.name + x.strand for x in self.nodes]) + "+", ",".join(['*' for x in self.nodes])])
 
@@ -165,7 +168,7 @@ def __repr__(self):
         return repr(self.path.accession)
 
     def __eq__(self, other):
-        if self.path.accession == other.path.accession and self.index == other.index:
+        if self.path.accession == other.path.accession: # and self.index == other.index:
             return True
         else:
             return False
@@ -174,7 +177,7 @@ def __lt__(self, other):
         return self.path.accession < other.path.accession
 
     def __hash__(self):
-        return hash(self.path.accession) * (self.index if self.index else 1)
+        return hash(self.path.accession)  # * (self.index if self.index else 1)
 
 
 class NodeTraversal:
@@ -186,6 +189,9 @@ def __init__(self, node: Node, strand: str = '+'):
     def __repr__(self):
         return self.node.seq
 
+    def __eq__(self, other):
+        return self.node.id == other.node.id
+
 
 class Graph:
     def __init__(self, paths: Iterable = None):
@@ -250,6 +256,7 @@ def __init__(self, paths):
             self.compute_slices()
 
     def __eq__(self, representation):
+        print(self,representation)
         if isinstance(representation, SlicedGraph):
             return all(slice_a == slice_b for slice_a, slice_b in zip_longest(self.slices, representation.slices))
         return self == SlicedGraph.build(representation)  # build a graph then compare it
diff --git a/src/sort.py b/src/sort.py
index 003b181..ad41c60 100644
--- a/src/sort.py
+++ b/src/sort.py
@@ -4,7 +4,7 @@
 
 @dataclasses.dataclass
 class Profile:
-    node: NodeIndex
+    node: NodeTraversal
     paths: List[Path]
     candidate_paths: set()
     duplicate: bool = False
@@ -34,7 +34,7 @@ def search_for_minimizing_replications(self) -> (List[Profile], int):
     def recursive_merge(self, primary_path_index: int = 0) -> List[Profile]:
         profile = []
         for node_index in self.paths[primary_path_index].nodes:
-            profile.append(Profile(node_index, [self.paths[primary_path_index]], {self.paths[primary_path_index].name}, False))
+            profile.append(Profile(node_index, [self.paths[primary_path_index]], {self.paths[primary_path_index]}, False))
         for i, path in enumerate(self.paths):
             if i == primary_path_index:
                 continue
@@ -55,47 +55,49 @@ def lcs(self, s1: List[Profile], s2: Path) -> List[Profile]:
         index = []
         prev = set()
         candidate_path_flag = False
+#        print(s1., s2.nodes)
 
         while i > 0 and j > 0:
             if s1[i-1].node == s2.nodes[j-1]:
                 prev_paths = s1[i-1].paths
                 prev_paths.append(s2)
                 candidate_paths = s1[i-1].candidate_paths
-                candidate_paths.add(s2.name)
+                candidate_paths.add(s2)
                 candidate_path_flag = True
 
-                index.append(Profile(s1[i-1].node, prev_paths, candidate_paths, s1[i-1].node.node.index in prev))
-                prev.add(s1[i-1].node.node.index)
+                index.append(Profile(s1[i-1].node, prev_paths, candidate_paths, s1[i-1].node.node.id in prev))
+                prev.add(s1[i-1].node.node.id)
                 i -= 1
                 j -= 1
             elif dp[i-1][j] > dp[i][j-1]:
                 prev_paths = s1[i-1].paths
                 candidate_paths = s1[i-1].candidate_paths
                 if candidate_path_flag:
-                    candidate_paths.add(s2.name)
-                index.append(Profile(s1[i-1].node, prev_paths, candidate_paths, s1[i-1].node.node.index in prev))
-                prev.add(s1[i-1].node.node.index)
+                    candidate_paths.add(s2)
+                index.append(Profile(s1[i-1].node, prev_paths, candidate_paths, s1[i-1].node.node.id in prev))
+                prev.add(s1[i-1].node.node.id)
                 i -= 1
             else:
-                candidate_paths = {s2.name}
+                candidate_paths = {s2}
                 if s1[i]:
                     candidate_paths |= s1[i].candidate_paths
                 if s1[i-1]:
                     candidate_paths |= s1[i-1].candidate_paths
-                index.append(Profile(s2.nodes[j-1], [s2], candidate_paths, s2.nodes[j-1].node.index in prev))
-                prev.add(s2.nodes[j-1].node.index)
+                index.append(Profile(s2.nodes[j-1], [s2], candidate_paths, s2.nodes[j-1].node.id in prev))
+                prev.add(s2.nodes[j-1].node.id)
                 j -= 1
 
         while i > 0:
             prev_paths = s1[i - 1].paths
             prev_candidates = s1[i-1].candidate_paths
-            index.append(Profile(s1[i - 1].node, prev_paths, prev_candidates, s1[i - 1].node.node.index in prev))
-            prev.add(s1[i - 1].node.node.index)
+            index.append(Profile(s1[i - 1].node, prev_paths, prev_candidates, s1[i - 1].node.node.id in prev))
+            prev.add(s1[i - 1].node.node.id)
             i -= 1
 
         while j > 0:
-            prev.add(s2.nodes[j - 1])
-            index.append(Profile(s2.nodes[j - 1], [s2], {s2.name}, False))
+            print(s2.nodes[j - 1], type(s2.nodes[j - 1]))
+            prev.add(s2.nodes[j - 1].node.id)
+            index.append(Profile(s2.nodes[j - 1], [s2], {s2}, False))
             j -= 1
 
         index.reverse()
@@ -105,28 +107,33 @@ def lcs(self, s1: List[Profile], s2: Path) -> List[Profile]:
     def to_graph(self, profile: List[Profile]):
         factory_input = []
         current_slice = Slice([])
-        # print(self.profile)
+        current_paths = []
         for prof in profile:
-            paths = [x.name for x in prof.paths]
+            paths = [x for x in prof.paths]
             if len(prof.paths) == len(prof.candidate_paths):
                 if len(current_slice.nodes) > 0:
                     factory_input.append(current_slice)
-                factory_input.append(Slice([Node(prof.node.node.seq, paths, prof.node.node.index)]))
+                factory_input.append(Slice([Node(prof.node.node.seq, paths, prof.node.node.id)]))
                 current_slice = Slice([])
+                current_paths = []
             else:
+                all_path_set = set([x for x in current_paths])
                 all_set = set()
-                for items in [x.paths for x in current_slice.nodes]:
-                    all_set |= items
+                for items in [x.paths for x in current_slice]:
+                     items = set(items) #print(type(list(items)[0]))
+                     all_set |= items
                 # print(all_set, prof.candidate_paths, prof.paths, set([x.name for x in prof.paths]) & all_set)
-                if set([x.name for x in prof.paths])  & all_set != set():
+                if set([x for x in prof.paths]) & all_path_set != set():
                     if len(current_slice.nodes) > 0:
-                        if prof.candidate_paths - all_set != set():
-                            current_slice.add_node(Node("", prof.candidate_paths - all_set))
+                        if prof.candidate_paths - all_path_set != set():
+                            current_slice.add_node(Node("", prof.candidate_paths - all_path_set))
                         factory_input.append(current_slice)
-                    current_slice = Slice([Node(prof.node.node.seq, paths, prof.node.node.index)])
+                    current_slice = Slice([Node(prof.node.node.seq, paths, prof.node.node.id)])
+                    current_paths = paths
                 else:
-                    current_slice.add_node(Node(prof.node.node.seq, paths, prof.node.node.index))
+                    current_slice.add_node(Node(prof.node.node.seq, paths, prof.node.node.id))
+                    current_paths.extend(paths)
 
-        base_graph = Graph.load_from_slices(factory_input)
+        base_graph = SlicedGraph.load_from_slices(factory_input, self.paths)
         # print(factory_input)
         return base_graph
diff --git a/src/test.py b/src/test.py
index c51e431..ae564de 100644
--- a/src/test.py
+++ b/src/test.py
@@ -107,11 +107,8 @@ def pf(wd, path):
 
 # Define several test example directories
 PATH_TO_TEST_DATA = pf(WD, "test/")
+x,y,z,a = 'x', 'y', 'z', 'a'
 
-a = 'a'
-x = 'x'
-y = 'y'
-z = 'z'
 class DAGifyTest(unittest.TestCase):
     """ test class of sort.py
     """
@@ -121,11 +118,11 @@ def test_dagify(self):
         gfa = GFA.load_from_gfa("../test/test.gfa")
         paths = gfa.to_paths
         dagify = DAGify(paths)
-        dagify.recursive_merge(0)
-        graph = dagify.to_graph()
+        profile = dagify.recursive_merge(0)
+        graph = dagify.to_graph(profile)
+#        x, y, z = graph.paths['x'], graph.paths['y'], graph.paths['z']
 
-        graph_by_toplogical_sort = gfa.to_graph
-        self.assertEqual(graph, graph_by_toplogical_sort)
+        self.assertEqual([['CAAATAAG', {x,y,z}], ['A', {y,z}, 'G', {x}], ['C', {x,y,z}], ['TTG', {x,y,z}], ['A', {z}, 'G', {x,y}], ['AAATTTTCTGGAGTTCTAT', {x,y,z}], ['T', {x,y,z}], ['ATAT', {x,y,z}], ['T', {x,y,z}], ['CCAACTCTCTG', {x,y,z}]], graph)
 
     def test_dagify2(self):
         gfa = GFA.load_from_gfa("../test/test2.gfa")
@@ -133,7 +130,8 @@ def test_dagify2(self):
         dagify = DAGify(paths)
         profile = dagify.recursive_merge(0)
         graph = dagify.to_graph(profile)
-        self.assertEqual(graph, [['CAAATAAG', {x, y, z}], ['G', {x}, 'A', {y, z}], ['C', {x, y}, 'T', {z}], ['TTG', {x, y, z}], ['G', {x, y}, 'A', {a, z}], ['AAATTTTCTGGAGTTCTAT', {a, x, y, z}], ['A', {a, z}, 'T', {x, y}], ['ATAT', {x, y, z}], ['T', {x, y, z}], ['CCAACTCTCTG', {x, y, z}]])
+        x,y,z,a = 'x', 'y', 'z', 'a'
+        self.assertEqual([['CAAATAAG', {x, y, z}], ['G', {x}, 'A', {y, z}], ['C', {x, y}, 'T', {z}], ['TTG', {x, y, z}], ['G', {x, y}, 'A', {a, z}], ['AAATTTTCTGGAGTTCTAT', {a, x, y, z}], ['A', {a, z}, 'T', {x, y}], ['ATAT', {x, y, z}], ['T', {x, y, z}], ['CCAACTCTCTG', {x, y, z}]], graph)
 
     def test_dagify3(self):
         gfa = GFA.load_from_gfa("../test/test3.gfa")

From 6db2e7ce595fe3e6aac525da5d17f11e5a6e5e2d Mon Sep 17 00:00:00 2001
From: Toshiyuki Yokoyama <yokoyama@no.reply>
Date: Thu, 18 Jul 2019 12:02:35 +0900
Subject: [PATCH 07/14] #13: Update compute_slices to use DAGify (WIP)

---
 src/graph.py | 15 ++++++++++++++-
 src/test.py  | 11 +++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/src/graph.py b/src/graph.py
index 1212995..fa526b7 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -246,6 +246,9 @@ def compute_slices(self):
         return SlicedGraph.from_graph(self)
 
 
+from sort import DAGify
+
+
 class SlicedGraph(Graph):
     def __init__(self, paths):
         super(SlicedGraph, self).__init__(paths)
@@ -256,7 +259,6 @@ def __init__(self, paths):
             self.compute_slices()
 
     def __eq__(self, representation):
-        print(self,representation)
         if isinstance(representation, SlicedGraph):
             return all(slice_a == slice_b for slice_a, slice_b in zip_longest(self.slices, representation.slices))
         return self == SlicedGraph.build(representation)  # build a graph then compare it
@@ -286,6 +288,16 @@ def compute_slices(self):
             self.slices.append(Slice([node]))
         return self
 
+    def compute_slices_by_dagify(self):
+        """This method uses DAGify algorithm to compute slices."""
+        if not self.paths:
+            return self
+        dagify = DAGify(self.paths)
+        profile = dagify.recursive_merge(0)
+        graph = dagify.to_graph(profile)
+        self.slices = graph.slices
+        return self
+
     @staticmethod
     def build(cmd):
         """This factory uses existing slice declarations to build a graph with Paths populated in the order
@@ -296,6 +308,7 @@ def build(cmd):
         # preemptively grab all the path names from every odd list entry
         paths = {key for sl in cmd for i in range(0, len(sl), 2) for key in sl[i + 1]}
         graph = SlicedGraph(paths)
+        graph.slices = []
         for sl in cmd:
             current_slice = []
             if isinstance(sl, Slice):
diff --git a/src/test.py b/src/test.py
index ae564de..9dee219 100644
--- a/src/test.py
+++ b/src/test.py
@@ -192,6 +192,17 @@ def test_gfa_to_sliced_graph(self):
         print(slices)
         self.assertEqual(slices, [['CAAATAAG', {x, y, z}], ['A', {y, z}, 'G', {x}], ['C', {x, y, z}], ['TTG', {x, y, z}], ['A', {z}, 'G', {x, y}], ['AAATTTTCTGGAGTTCTAT', {x, y, z}], ['T', {x, y, z}], ['ATAT', {x, y, z}], ['T', {x, y, z}], ['CCAACTCTCTG', {x, y, z}]])
 
+    def test_gfa_to_sliced_graph_via_dagify(self):
+        #TODO: this is currently close but not quite there.
+        # Slices must be fully defined in SlicedGraph.compute_slices()
+        graph, gfa = self.make_graph_from_gfa()
+        slices = SlicedGraph.from_graph(graph)
+        x = 'x'
+        y = 'y'
+        z = 'z'
+        print(slices)
+        self.assertEqual(slices, [['CAAATAAG', {x, y, z}], ['A', {y, z}, 'G', {x}], ['C', {x, y, z}], ['TTG', {x, y, z}], ['A', {z}, 'G', {x, y}], ['AAATTTTCTGGAGTTCTAT', {x, y, z}], ['T', {x, y, z}], ['ATAT', {x, y, z}], ['T', {x, y, z}], ['CCAACTCTCTG', {x, y, z}]])
+
     def make_graph_from_gfa(self):
         gfa = GFA.load_from_gfa(PATH_TO_TEST_DATA + "test.gfa")
         graph = gfa.to_graph

From 47e760404bc9723bb3d12c250bd9039e2fc9496b Mon Sep 17 00:00:00 2001
From: Toshiyuki Yokoyama <yokoyama@no.reply>
Date: Thu, 18 Jul 2019 15:58:31 +0900
Subject: [PATCH 08/14] #13: Refactoring

---
 src/gfa.py   | 50 +-------------------------------------------------
 src/graph.py |  6 +++---
 src/sort.py  | 11 +++++------
 3 files changed, 9 insertions(+), 58 deletions(-)

diff --git a/src/gfa.py b/src/gfa.py
index 3195e73..7bd7fea 100644
--- a/src/gfa.py
+++ b/src/gfa.py
@@ -147,58 +147,10 @@ def to_graph(self):
                 graph.append_node_to_path(node.name, node.orient, path.name)
         for segment in self.gfa.segments:
             graph.nodes[segment.name].seq = segment.sequence
+        graph.paths = self.to_paths
         return graph
         # IMPORTANT: It's not clear to Josiah how much of the below is necessary, so it's being left unmodified.
 
-        topological_sort_helper = TopologicalSort()
-        path_dict = defaultdict(list)
-        node_hash = {}
-
-        # Extract all paths into graph
-        for path in self.gfa.paths:
-            for node in path.segment_names:
-                path_dict[node.name + node.orient].append(path.name)
-            for node_pair in pairwise(path.segment_names):
-                topological_sort_helper.add_edge(
-                    node_pair[0].name + node_pair[0].orient,
-                    node_pair[1].name + node_pair[1].orient)
-
-        # Extract all nodes in the graph.
-        for segment in self.gfa.segments:
-            node_id = segment.name + "+"
-            node = Node(segment.sequence, path_dict[node_id])
-            node_hash[node_id] = node
-
-            node_id = segment.name + "-"
-            node = Node(segment.sequence, path_dict[node_id])
-            node_hash[node_id] = node
-
-        node_stack = topological_sort_helper.topologicalSort()
-
-        # Cluster nodes as multiple slices according to the result of the topological sort.
-        factory_input = []
-        current_slice = Slice([])
-        for node in node_stack:
-            if len(path_dict[node]) == len(self.gfa.paths):
-                if len(current_slice.nodes) > 0:
-                    factory_input.append(current_slice)
-                factory_input.append(Slice([node_hash[node]]))
-                current_slice = Slice([])
-            else:
-                all_set = set()
-                for items in [x.paths for x in current_slice.nodes]:
-                    all_set = all_set | items
-                if set(path_dict[node]) & all_set != set():
-                    if len(current_slice.nodes) > 0:
-                        current_slice.add_node(Node("", set([x.name for x in self.gfa.paths]) - all_set))
-                        factory_input.append(current_slice)
-                    current_slice = Slice([node_hash[node]])
-                else:
-                    current_slice.add_node(node_hash[node])
-
-        base_graph = SlicedGraph.load_from_slices(factory_input, self.gfa.paths)
-        return base_graph
-
 
 '''
 class XGWrapper:
diff --git a/src/graph.py b/src/graph.py
index fa526b7..7657c29 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -275,7 +275,7 @@ def from_graph(graph):
         g = SlicedGraph([])
         g.paths = graph.paths  # shallow copy all relevant fields
         g.nodes = graph.nodes
-        g.compute_slices()
+        g.compute_slices_by_dagify()
         return g
 
     def compute_slices(self):
@@ -294,8 +294,8 @@ def compute_slices_by_dagify(self):
             return self
         dagify = DAGify(self.paths)
         profile = dagify.recursive_merge(0)
-        graph = dagify.to_graph(profile)
-        self.slices = graph.slices
+        slices = dagify.to_slices(profile)
+        self.slices = slices
         return self
 
     @staticmethod
diff --git a/src/sort.py b/src/sort.py
index ad41c60..fdd3a09 100644
--- a/src/sort.py
+++ b/src/sort.py
@@ -104,7 +104,7 @@ def lcs(self, s1: List[Profile], s2: Path) -> List[Profile]:
 
         return index
 
-    def to_graph(self, profile: List[Profile]):
+    def to_slices(self, profile: List[Profile]):
         factory_input = []
         current_slice = Slice([])
         current_paths = []
@@ -118,11 +118,6 @@ def to_graph(self, profile: List[Profile]):
                 current_paths = []
             else:
                 all_path_set = set([x for x in current_paths])
-                all_set = set()
-                for items in [x.paths for x in current_slice]:
-                     items = set(items) #print(type(list(items)[0]))
-                     all_set |= items
-                # print(all_set, prof.candidate_paths, prof.paths, set([x.name for x in prof.paths]) & all_set)
                 if set([x for x in prof.paths]) & all_path_set != set():
                     if len(current_slice.nodes) > 0:
                         if prof.candidate_paths - all_path_set != set():
@@ -133,7 +128,11 @@ def to_graph(self, profile: List[Profile]):
                 else:
                     current_slice.add_node(Node(prof.node.node.seq, paths, prof.node.node.id))
                     current_paths.extend(paths)
+        return factory_input
+
 
+    def to_graph(self, profile: List[Profile]):
+        factory_input = self.to_slices(profile)
         base_graph = SlicedGraph.load_from_slices(factory_input, self.paths)
         # print(factory_input)
         return base_graph

From 33370e94cddfffb573502b048184b1bb582a113e Mon Sep 17 00:00:00 2001
From: Toshiyuki Yokoyama <yokoyama@no.reply>
Date: Thu, 18 Jul 2019 16:16:52 +0900
Subject: [PATCH 09/14] #13: Hotfix

---
 src/gfa.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gfa.py b/src/gfa.py
index 7bd7fea..738d7fb 100644
--- a/src/gfa.py
+++ b/src/gfa.py
@@ -108,7 +108,7 @@ def save_as_gfa(self, file: str):
     def from_graph(cls, graph: Graph):
         """Constructs the lines of a GFA file listing paths, then sequence nodes in arbitrary order."""
         gfa = gfapy.Gfa()
-        for path in graph.paths.values():
+        for path in graph.paths:
             node_series = ",".join([traverse.node.id + traverse.strand for traverse in path.nodes])
             gfa.add_line('\t'.join(['P', path.accession, node_series, ",".join(['*' for _ in path.nodes])]))
         for node in graph.nodes.values(): # in no particular order

From c14eb81aacafd445e30edce2e378c954a34f1be7 Mon Sep 17 00:00:00 2001
From: Toshiyuki Yokoyama <yokoyama@no.reply>
Date: Thu, 18 Jul 2019 17:28:10 +0900
Subject: [PATCH 10/14] #13: Update tests

---
 src/sort.py                | 17 +++++++++--------
 src/test.py                | 38 ++++++++++++++++++++++++++++++++++++--
 test/inversion.gfa         |  3 ++-
 test/unresolved_repeat.gfa |  2 +-
 4 files changed, 48 insertions(+), 12 deletions(-)

diff --git a/src/sort.py b/src/sort.py
index fdd3a09..d480447 100644
--- a/src/sort.py
+++ b/src/sort.py
@@ -10,7 +10,7 @@ class Profile:
     duplicate: bool = False
 
     def __repr__(self):
-        return "["+str(self.node.node) + str(self.paths)+"]"
+        return "["+str(self.node.node) + str(self.paths)+":"+str(self.candidate_paths) +"]"
 
 class DAGify:
     def __init__(self, paths: List[Path], nodes={}):
@@ -55,7 +55,6 @@ def lcs(self, s1: List[Profile], s2: Path) -> List[Profile]:
         index = []
         prev = set()
         candidate_path_flag = False
-#        print(s1., s2.nodes)
 
         while i > 0 and j > 0:
             if s1[i-1].node == s2.nodes[j-1]:
@@ -79,7 +78,7 @@ def lcs(self, s1: List[Profile], s2: Path) -> List[Profile]:
                 i -= 1
             else:
                 candidate_paths = {s2}
-                if s1[i]:
+                if i > n and s1[i]:
                     candidate_paths |= s1[i].candidate_paths
                 if s1[i-1]:
                     candidate_paths |= s1[i-1].candidate_paths
@@ -95,29 +94,33 @@ def lcs(self, s1: List[Profile], s2: Path) -> List[Profile]:
             i -= 1
 
         while j > 0:
-            print(s2.nodes[j - 1], type(s2.nodes[j - 1]))
+#            print(s2.nodes[j - 1], type(s2.nodes[j - 1]))
             prev.add(s2.nodes[j - 1].node.id)
             index.append(Profile(s2.nodes[j - 1], [s2], {s2}, False))
             j -= 1
 
         index.reverse()
+        # print(index)
 
         return index
 
-    def to_slices(self, profile: List[Profile]):
+    def to_slices(self, profile: List[Profile]) -> List[Path]:
         factory_input = []
         current_slice = Slice([])
         current_paths = []
         for prof in profile:
             paths = [x for x in prof.paths]
+            all_path_set = set([x for x in current_paths])
+            # print(prof, current_slice, current_paths)
             if len(prof.paths) == len(prof.candidate_paths):
                 if len(current_slice.nodes) > 0:
+                    if prof.candidate_paths - all_path_set != set():
+                        current_slice.add_node(Node("", prof.candidate_paths - all_path_set))
                     factory_input.append(current_slice)
                 factory_input.append(Slice([Node(prof.node.node.seq, paths, prof.node.node.id)]))
                 current_slice = Slice([])
                 current_paths = []
             else:
-                all_path_set = set([x for x in current_paths])
                 if set([x for x in prof.paths]) & all_path_set != set():
                     if len(current_slice.nodes) > 0:
                         if prof.candidate_paths - all_path_set != set():
@@ -130,9 +133,7 @@ def to_slices(self, profile: List[Profile]):
                     current_paths.extend(paths)
         return factory_input
 
-
     def to_graph(self, profile: List[Profile]):
         factory_input = self.to_slices(profile)
         base_graph = SlicedGraph.load_from_slices(factory_input, self.paths)
-        # print(factory_input)
         return base_graph
diff --git a/src/test.py b/src/test.py
index 9dee219..db60bb3 100644
--- a/src/test.py
+++ b/src/test.py
@@ -149,7 +149,7 @@ def test_dagify_altpath(self):
         profile, rep_count = dagify.search_for_minimizing_replications()
         graph = dagify.to_graph(profile)
         self.assertEqual(rep_count, 1)
-        self.assertEqual(graph, [['CAAATAAG', {x, y}], ['A', {x}], ['G', {x, y}], ['A', {y}], ['T', {x, y}]])
+        self.assertEqual(graph, [['CAAATAAG', {x, y}], ['A', {x}, '', {y}], ['G', {x, y}], ['A', {y}, '', {x}], ['T', {x, y}]])
 
     def test_dagify_dup(self):
         gfa = GFA.load_from_gfa("../test/duplicate.gfa")
@@ -158,9 +158,43 @@ def test_dagify_dup(self):
         profile, rep_count = dagify.search_for_minimizing_replications()
         graph = dagify.to_graph(profile)
         self.assertEqual(rep_count, 2)
-        self.assertEqual(graph, [['CAAATAAG', {x, y}], ['', {x}, 'A', {y}], ['G', {y}], ['A', {x, y}], ['G', {x, y}], ['T', {x, y}]])
+        self.assertEqual(graph, [['CAAATAAG', {x, y}], ['', {x}, 'A', {y}], ['', {x}, 'G', {y}], ['A', {x, y}], ['G', {x, y}], ['T', {x, y}]])
 
 
+    def test_unresolved_repreat(self):
+        gfa = GFA.load_from_gfa("../test/unresolved_repeat.gfa")
+        paths = gfa.to_paths
+        dagify = DAGify(paths)
+        profile, rep_count = dagify.search_for_minimizing_replications()
+        graph = dagify.to_graph(profile)
+        self.assertEqual([['CAAATAAG', {'x'}, 'T', {'y'}], ['A', {'y', 'x'}], ['G', {'x'}, 'C', {'y'}]], graph)
+
+    @unittest.skip("Inversion is unsupported")
+    def test_inversion(self):
+        gfa = GFA.load_from_gfa("../test/inversion.gfa")
+        paths = gfa.to_paths
+        dagify = DAGify(paths)
+        profile, rep_count = dagify.search_for_minimizing_replications()
+        graph = dagify.to_graph(profile)
+        self.assertEqual(graph, [])
+
+    @unittest.skip("Inversion is unsupported")
+    def test_nested_inversion(self):
+        gfa = GFA.load_from_gfa("../test/nested_inv.gfa")
+        paths = gfa.to_paths
+        dagify = DAGify(paths)
+        profile, rep_count = dagify.search_for_minimizing_replications()
+        graph = dagify.to_graph(profile)
+        self.assertEqual(graph, [])
+
+    def test_simple_inversion(self):
+        gfa = GFA.load_from_gfa("../test/simple_inv.gfa")
+        paths = gfa.to_paths
+        dagify = DAGify(paths)
+        profile, rep_count = dagify.search_for_minimizing_replications()
+        graph = dagify.to_graph(profile)
+#        self.assertEqual(graph, [['CAAATAAG', {x,y}], ['A', {x,y}], ['G', {x, y}]])
+        self.assertEqual(graph, [['CAAATAAG', {x,y}], ['A', {x}, 'A', {y}], ['G', {x, y}]])
 
 class GFATest(unittest.TestCase):
     """ test class of gfa.py
diff --git a/test/inversion.gfa b/test/inversion.gfa
index d5a9e92..4572a75 100644
--- a/test/inversion.gfa
+++ b/test/inversion.gfa
@@ -7,4 +7,5 @@ L	1	+	3	-	0M
 S	2	A
 L	2	+	3	+	0M
 L	3	+	4	+	0M
-S	3	G
\ No newline at end of file
+S	3	G
+S	4	T
\ No newline at end of file
diff --git a/test/unresolved_repeat.gfa b/test/unresolved_repeat.gfa
index 1e18616..6cd4e7c 100644
--- a/test/unresolved_repeat.gfa
+++ b/test/unresolved_repeat.gfa
@@ -1,6 +1,6 @@
 H	VN:Z:1.0
 P	x	1+,2+,3+	*,*
-P	y	4+,2+,5+	*,*,*,*,*,*,*,*,*
+P	y	4+,2+,5+	*,*
 S	1	CAAATAAG
 L	1	+	2	+	0M
 L	4	+	2	+	0M

From f3e8cada93e9ebed94bc6b2a658ff7767152f34f Mon Sep 17 00:00:00 2001
From: Toshiyuki Yokoyama <yokoyama@no.reply>
Date: Thu, 18 Jul 2019 17:56:41 +0900
Subject: [PATCH 11/14] #13: Update tests

---
 src/sort.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/sort.py b/src/sort.py
index d480447..6ac7480 100644
--- a/src/sort.py
+++ b/src/sort.py
@@ -100,7 +100,7 @@ def lcs(self, s1: List[Profile], s2: Path) -> List[Profile]:
             j -= 1
 
         index.reverse()
-        # print(index)
+        print(index)
 
         return index
 
@@ -108,11 +108,16 @@ def to_slices(self, profile: List[Profile]) -> List[Path]:
         factory_input = []
         current_slice = Slice([])
         current_paths = []
-        for prof in profile:
+        # print(profile)
+        for index, prof in enumerate(profile):
             paths = [x for x in prof.paths]
             all_path_set = set([x for x in current_paths])
             # print(prof, current_slice, current_paths)
-            if len(prof.paths) == len(prof.candidate_paths):
+            candidate_paths_set = prof.candidate_paths
+            if index + 1 != len(profile):
+                candidate_paths_set |= profile[index+1].candidate_paths
+
+            if len(prof.paths) == len(candidate_paths_set):
                 if len(current_slice.nodes) > 0:
                     if prof.candidate_paths - all_path_set != set():
                         current_slice.add_node(Node("", prof.candidate_paths - all_path_set))
@@ -131,6 +136,12 @@ def to_slices(self, profile: List[Profile]) -> List[Path]:
                 else:
                     current_slice.add_node(Node(prof.node.node.seq, paths, prof.node.node.id))
                     current_paths.extend(paths)
+
+        if len(current_slice.nodes) > 0:
+            all_path_set = set([x for x in current_paths])
+            if profile[-1].candidate_paths - all_path_set != set():
+                current_slice.add_node(Node("", prof.candidate_paths - all_path_set))
+            factory_input.append(current_slice)
         return factory_input
 
     def to_graph(self, profile: List[Profile]):

From 8a549285106aa95a7524d56451400e85a173beaa Mon Sep 17 00:00:00 2001
From: Toshiyuki Yokoyama <yokoyama@no.reply>
Date: Thu, 18 Jul 2019 20:36:06 +0900
Subject: [PATCH 12/14] #13: Fix tests

---
 src/sort.py |  4 +---
 src/test.py | 10 +++++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/sort.py b/src/sort.py
index 6ac7480..2636e2a 100644
--- a/src/sort.py
+++ b/src/sort.py
@@ -94,13 +94,11 @@ def lcs(self, s1: List[Profile], s2: Path) -> List[Profile]:
             i -= 1
 
         while j > 0:
-#            print(s2.nodes[j - 1], type(s2.nodes[j - 1]))
             prev.add(s2.nodes[j - 1].node.id)
             index.append(Profile(s2.nodes[j - 1], [s2], {s2}, False))
             j -= 1
 
         index.reverse()
-        print(index)
 
         return index
 
@@ -108,7 +106,7 @@ def to_slices(self, profile: List[Profile]) -> List[Path]:
         factory_input = []
         current_slice = Slice([])
         current_paths = []
-        # print(profile)
+
         for index, prof in enumerate(profile):
             paths = [x for x in prof.paths]
             all_path_set = set([x for x in current_paths])
diff --git a/src/test.py b/src/test.py
index db60bb3..6476193 100644
--- a/src/test.py
+++ b/src/test.py
@@ -187,6 +187,7 @@ def test_nested_inversion(self):
         graph = dagify.to_graph(profile)
         self.assertEqual(graph, [])
 
+    @unittest.skip("Inversion is unsupported")
     def test_simple_inversion(self):
         gfa = GFA.load_from_gfa("../test/simple_inv.gfa")
         paths = gfa.to_paths
@@ -196,14 +197,17 @@ def test_simple_inversion(self):
 #        self.assertEqual(graph, [['CAAATAAG', {x,y}], ['A', {x,y}], ['G', {x, y}]])
         self.assertEqual(graph, [['CAAATAAG', {x,y}], ['A', {x}, 'A', {y}], ['G', {x, y}]])
 
+
+location_of_xg = "../test/xg"
+
+
 class GFATest(unittest.TestCase):
     """ test class of gfa.py
     """
 
-    @unittest.expectedFailure
+    @unittest.skipIf(not os.path.isfile(location_of_xg), "XG binary is not found.")
     def test_gfa(self):
         self.maxDiff = None
-        location_of_xg = "../test/xg"
         graph = GFA.load_from_gfa("../test/test.gfa")
         graph.save_as_xg("../test/test.xg", location_of_xg)
         graph2 = GFA.load_from_xg("../test/test.xg", location_of_xg)
@@ -254,11 +258,11 @@ def test_load_gfa_to_graph_2(self):
 
     @unittest.expectedFailure
     def test_load_gfa_via_xg(self):
-        location_of_xg = "../test/xg"
         graph = GFA.load_from_gfa("../test/test.gfa")
         graph.save_as_xg("../test/test.xg", location_of_xg)
         graph2 = GFA.load_from_xg("../test/test.xg", location_of_xg)
         graph = graph2.to_graph
+        graph = SlicedGraph.from_graph(graph)
         x = 'x'
         y = 'y'
         z = 'z'

From aee4823434275af8b17970419f336604efb308e2 Mon Sep 17 00:00:00 2001
From: Toshiyuki Yokoyama <yokoyama@no.reply>
Date: Thu, 18 Jul 2019 20:53:22 +0900
Subject: [PATCH 13/14] #13: Fix tests

---
 src/graph.py        | 9 +++++++--
 src/test.py         | 3 +--
 test/simple_inv.gfa | 2 +-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index 7657c29..31e22e9 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -187,10 +187,15 @@ def __init__(self, node: Node, strand: str = '+'):
         self.strand = strand  # TODO: make this required
 
     def __repr__(self):
-        return self.node.seq
+        if self.strand == '+':
+            return self.node.seq
+        else:
+            complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
+            return "".join(complement.get(base, base) for base in reversed(self.node.seq))
+
 
     def __eq__(self, other):
-        return self.node.id == other.node.id
+        return self.node.id == other.node.id and self.strand == other.strand
 
 
 class Graph:
diff --git a/src/test.py b/src/test.py
index 6476193..77e958a 100644
--- a/src/test.py
+++ b/src/test.py
@@ -194,8 +194,7 @@ def test_simple_inversion(self):
         dagify = DAGify(paths)
         profile, rep_count = dagify.search_for_minimizing_replications()
         graph = dagify.to_graph(profile)
-#        self.assertEqual(graph, [['CAAATAAG', {x,y}], ['A', {x,y}], ['G', {x, y}]])
-        self.assertEqual(graph, [['CAAATAAG', {x,y}], ['A', {x}, 'A', {y}], ['G', {x, y}]])
+        self.assertEqual(graph, [['CAAATAAG', {x,y}], ['AC', {x}, 'AC', {y}], ['G', {x, y}]])
 
 
 location_of_xg = "../test/xg"
diff --git a/test/simple_inv.gfa b/test/simple_inv.gfa
index de62c6b..9d8d152 100644
--- a/test/simple_inv.gfa
+++ b/test/simple_inv.gfa
@@ -4,7 +4,7 @@ P	y	1+,2-,3+	*,*
 S	1	CAAATAAG
 L	1	+	2	+	0M
 L	1	+	2	-	0M
-S	2	A
+S	2	AC
 L	2	-	3	+	0M
 L	2	+	3	+	0M
 S	3	G
\ No newline at end of file

From 0dc052855d6d1d34262bab3b2721f54e196fdff5 Mon Sep 17 00:00:00 2001
From: Josiah Seaman <josiah@newline.us>
Date: Fri, 19 Jul 2019 10:35:59 +0100
Subject: [PATCH 14/14] Cleaned up sort imports for code review

---
 src/graph.py |  5 ++---
 src/sort.py  | 19 ++++++++++++-------
 src/test.py  |  2 --
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index 31e22e9..ffda621 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -251,9 +251,6 @@ def compute_slices(self):
         return SlicedGraph.from_graph(self)
 
 
-from sort import DAGify
-
-
 class SlicedGraph(Graph):
     def __init__(self, paths):
         super(SlicedGraph, self).__init__(paths)
@@ -295,6 +292,8 @@ def compute_slices(self):
 
     def compute_slices_by_dagify(self):
         """This method uses DAGify algorithm to compute slices."""
+        from src.sort import DAGify  # help avoid circular import
+
         if not self.paths:
             return self
         dagify = DAGify(self.paths)
diff --git a/src/sort.py b/src/sort.py
index 2636e2a..946c42e 100644
--- a/src/sort.py
+++ b/src/sort.py
@@ -1,6 +1,9 @@
-from src.graph import *
-
+import sys
 import dataclasses
+from typing import List
+
+from src.graph import NodeTraversal, Path, Slice, Node, SlicedGraph
+
 
 @dataclasses.dataclass
 class Profile:
@@ -13,11 +16,12 @@ def __repr__(self):
         return "["+str(self.node.node) + str(self.paths)+":"+str(self.candidate_paths) +"]"
 
 class DAGify:
-    def __init__(self, paths: List[Path], nodes={}):
+    def __init__(self, paths: List[Path], nodes=None):
         """
-
         :type paths: List[Path]
         """
+        if nodes is None:
+            nodes = {}
         self.paths = paths
         self.nodes = nodes
 
@@ -102,7 +106,7 @@ def lcs(self, s1: List[Profile], s2: Path) -> List[Profile]:
 
         return index
 
-    def to_slices(self, profile: List[Profile]) -> List[Path]:
+    def to_slices(self, profile: List[Profile]) -> List[Slice]:
         factory_input = []
         current_slice = Slice([])
         current_paths = []
@@ -138,11 +142,12 @@ def to_slices(self, profile: List[Profile]) -> List[Path]:
         if len(current_slice.nodes) > 0:
             all_path_set = set([x for x in current_paths])
             if profile[-1].candidate_paths - all_path_set != set():
+                print(prof)
                 current_slice.add_node(Node("", prof.candidate_paths - all_path_set))
             factory_input.append(current_slice)
         return factory_input
 
-    def to_graph(self, profile: List[Profile]):
-        factory_input = self.to_slices(profile)
+    def to_graph(self, profiles: List[Profile]):
+        factory_input = self.to_slices(profiles)
         base_graph = SlicedGraph.load_from_slices(factory_input, self.paths)
         return base_graph
diff --git a/src/test.py b/src/test.py
index 77e958a..ec30b6c 100644
--- a/src/test.py
+++ b/src/test.py
@@ -219,8 +219,6 @@ def test_load_gfa_to_graph(self):
         self.assertEqual(len(graph.nodes), 15)
 
     def test_gfa_to_sliced_graph(self):
-        #TODO: this is currently close but not quite there.
-        # Slices must be fully defined in SlicedGraph.compute_slices()
         graph, gfa = self.make_graph_from_gfa()
         slices = SlicedGraph.from_graph(graph)
         x = 'x'