Skip to content
This repository has been archived by the owner on Mar 20, 2020. It is now read-only.

Commit

Permalink
#3 Graph object equality checks use a call hierarchy. Sorted frustrat…
Browse files Browse the repository at this point in the history
…ions with unordered set equality in string representation.
  • Loading branch information
josiahseaman committed Jun 20, 2019
1 parent e8c11ac commit fbd8a5b
Showing 1 changed file with 71 additions and 89 deletions.
160 changes: 71 additions & 89 deletions Graph_Summarization_Prototype.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,38 +45,45 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"c [['ACGT', {1, 2, 3, 4}], ['C', {1, 2, 4}, 'T', {3}], ['GGA', {1, 2, 3, 4}], ['C', {1, 2, 4}, '', {3}], ['AGTACG', {1, 2, 3}, 'CGTACT', {4}], ['TTG', {1, 2, 3, 4}]]\n",
"c [['ACGT', [1, 2, 3, 4]], ['C', [1, 2, 4], 'T', [3]], ['GGA', [1, 2, 3, 4]], ['C', [1, 2, 4], '', [3]], ['AGTACG', [1, 2, 3], 'CGTACT', [4]], ['TTG', [1, 2, 3, 4]]]\n",
"c [['ACGT', [1, 2, 3, 4]], ['C', [1, 2, 4], 'T', [3]], ['GGA', [1, 2, 3, 4]], ['C', [1, 2, 4], '', [3]], ['AGTACG', [1, 2, 3], 'CGTACT', [4]], ['TTG', [1, 2, 3, 4]]]\n",
"c [['ACGT', [1, 2, 3, 4]], ['C', [1, 2, 4], 'T', [3]], ['GGA', [1, 2, 3, 4]], ['C', [1, 2, 4], '', [3]], ['AGTACG', [1, 2, 3], 'CGTACT', [4]], ['TTG', [1, 2, 3, 4]]]\n"
]
}
],
"outputs": [],
"source": [
"import unittest \n",
"test = unittest.TestCase() # just using it for assertRaises\n",
"from typing import Callable, Iterator, Union, Optional, List, Iterable\n",
"from collections import namedtuple\n",
"# %debug\n",
"\n",
"from itertools import zip_longest\n",
"# %debug"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"#Node = namedtuple('Node', ['seq', 'paths'])\n",
"class Node:\n",
" def __init__(self, seq: str, paths: List[int]):\n",
" def __init__(self, seq: str, paths: Iterable[int]):\n",
" assert isinstance(seq, str), seq\n",
" assert not isinstance(paths, str) and isinstance(paths, Iterable), paths\n",
" self.seq = seq\n",
" self.paths = paths\n",
" self.paths = set(paths)\n",
" def __len__(self):\n",
" return len(self.paths)\n",
" def __repr__(self):\n",
" return repr(self.seq) + ', ' + repr(sorted(list(self.paths)))\n",
" \"\"\"Paths representation is sorted because set ordering is not guaranteed.\"\"\"\n",
" return repr(self.seq) + \\\n",
" ', {' + ', '.join(str(i) for i in sorted(list(self.paths))) + '}' \n",
" def __eq__(self, other):\n",
" if not isinstance(other, Node):\n",
" print(\"Warn: comparing Node and \", type(other), other)\n",
" return False\n",
" return self.seq == other.seq and self.paths == other.paths\n",
" def __hash__(self):\n",
" return hash(self.seq)\n",
" \n",
"\n",
"def merge(self, smaller: Node) -> Node:\n",
" m = Node(self.seq, self.paths.union(smaller.paths))\n",
Expand All @@ -86,16 +93,26 @@
"\n",
" \n",
"class Slice:\n",
" def __init__(self, nodes: List[Node]):\n",
" self.nodes = nodes #[nodes] if isinstance(nodes, Node) else nodes\n",
" def __init__(self, nodes: Iterable[Node]):\n",
" self.nodes = set(nodes) \n",
" def alternatives(self, main):\n",
" return self.nodes.difference({main})\n",
" def bystanders(self, first,second):\n",
" return self.nodes.difference({first,second})\n",
" def __len__(self):\n",
" return len(self.nodes)\n",
" def __repr__(self):\n",
" return self.nodes.__repr__()# '['+ ','.join(self.paths)+']'\n",
" #return '{' + ', '.join(str(i) for i in sorted(list(self.nodes))) + '}' \n",
" return list(self.nodes).__repr__()# '['+ ','.join(self.paths)+']'\n",
" def __eq__(self, other):\n",
" if isinstance(other, Slice):\n",
" #all(a==b for a,b in zip_longest(self.nodes,other.nodes)) # order dependent\n",
" if not self.nodes == other.nodes:\n",
" print(self.nodes, other.nodes, sep='\\n')\n",
" return self.nodes == other.nodes\n",
" else:\n",
" print(\"Warn: comparing Slice and \", type(other), other)\n",
" return False\n",
" \n",
" def primary(self):\n",
" return max(self.nodes, key=len) # When they're the same size, take the other\n",
Expand All @@ -109,20 +126,20 @@
" def __init__(self, cmd: List):\n",
" \"\"\"Factory for generating graphs from a representation\"\"\"\n",
" self.slices = []\n",
" if cmd[0] and isinstance(cmd[0][0], Node):\n",
" if cmd[0] and isinstance(next(iter(cmd[0])), Node):\n",
" self.slices = cmd # doesn't need to be parsed\n",
" else:\n",
" if isinstance(cmd, str):\n",
" cmd = eval(cmd)\n",
" print('c', cmd)\n",
" #print('c', cmd)\n",
" for sl in cmd:\n",
" current_slice = []\n",
" try:\n",
" for i in range(0, len(sl), 2):\n",
" current_slice.append(Node(sl[i], sl[i+1]))\n",
" except IndexError:\n",
" print(\"Expecting two terms: \", sl[i:i+2])\n",
" self.slices.append(current_slice)\n",
" self.slices.append(Slice(current_slice))\n",
" \n",
" def __repr__(self):\n",
" \"\"\"Warning: the representation strings are very sensitive to whitespace\"\"\"\n",
Expand All @@ -131,28 +148,36 @@
" return self.slices[i]\n",
" def __eq__(self, representation):\n",
" if isinstance(representation, Graph):\n",
" return str(self) == str(representation)\n",
" return str(self.slices) == str(Graph(representation).slices)\n",
" return all(slice_a == slice_b for slice_a, slice_b in zip_longest(self.slices, representation.slices))\n",
" return self == Graph(representation) # build a graph then compare it\n",
" \n",
" \n",
" \n",
"#base_graph = [ [{1,2,3,4}], [{1,2,4},{3}], [{1,2,3,4}], [{1,2,4},{3}], [{1,2,3},{4}], [{1,2,3,4}] ]\n",
"factory_input = [ ['ACGT',{1,2,3,4}], ['C',{1,2,4},'T',{3}], ['GGA',{1,2,3,4}], \n",
"factory_input = [['ACGT',{1,2,3,4}], ['C',{1,2,4},'T',{3}], ['GGA',{1,2,3,4}], \n",
" ['C',{1,2,4},'',{3}], ['AGTACG',{1,2,3},'CGTACT',{4}], ['TTG',{1,2,3,4}] ]\n",
"base_graph = [Slice(Node('ACGT', {1,2,3,4})), \n",
"base_graph = [Slice([Node('ACGT', {1,2,3,4})]), \n",
" Slice([Node('C',{1,2,4}),Node('T', {3})]), \n",
" Slice([Node('GGA',{1,2,3,4})]), \n",
" Slice([Node('C',{1,2,4}),Node('', {3})]),\n",
" Slice([Node('AGTACG',{1,2,3}), Node('CGTACT',{4})]),\n",
" Slice(Node('TTG',{1,2,3,4})) ]\n",
" Slice([Node('TTG',{1,2,3,4})]) ]\n",
"\n",
"g = Graph(factory_input)\n",
"repr(g) == str([['ACGT',[1,2,3,4]],['C',[1,2,4],'T',[3]],['GGA',[1,2,3,4]],['C',[1,2,4],'',[3]],['AGTACG',[1,2,3],'CGTACT',[4]],['TTG',[1,2,3,4]]])\n",
"g\n",
"assert g == str(factory_input), ('\\n' + repr(g) + '\\n' + str(factory_input))\n",
"g_double = Graph(eval(str(g)))\n",
"str(g_double) == str(g)\n",
"str(g_double) == str(g) # WARN: could be order sensitive, don't worry if it fails\n",
"assert g_double == g\n",
"assert g_double == [['ACGT',[1,2,3,4]],['C',[1,2,4],'T',[3]],['GGA',[1,2,3,4]],['C',[1,2,4],'',[3]],['AGTACG',[1,2,3],'CGTACT',[4]],['TTG',[1,2,3,4]]]\n",
"assert g_double == \"[['ACGT',[1,2,3,4]],['C',[1,2,4],'T',[3]],['GGA',[1,2,3,4]],['C',[1,2,4],'',[3]],['AGTACG',[1,2,3],'CGTACT',[4]],['TTG',[1,2,3,4]]]\""
"assert g_double == factory_input\n",
"assert g_double == str(factory_input)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Developers Note**: If you need sets with preserved ordering use Python 3.6 dictionaries instead of sets: `dict.fromkeys([67, 46, 55, 39, 94, 63, 34, 32, 57, 54, 67, 36, 63]).keys()`"
]
},
{
Expand All @@ -174,20 +199,9 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[['C', [1, 2, 3, 4]]]"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"def merge_vertical(one_slice: Slice) -> List[Slice]:\n",
" \"\"\"Merges the smallest node into the biggest node. Preserves bystander nodes as well.\"\"\"\n",
Expand All @@ -199,34 +213,24 @@
" return Graph([[merger]])\n",
" else: #Allows for third possibilities\n",
" return Graph([[merger, *one_slice.bystanders(smallest_node, biggest_node)]])\n",
"(merge_vertical(base_graph[1]))# == str([{1, 2, 3, 4}])\n",
"assert merge_vertical(base_graph[1]) == [['C', {1, 2, 3, 4}]]\n",
"merge_vertical(Graph([['A',{1,2,4}, 'C',{3}, 'T',{12,16}]])[0])\n",
"# assert merge_vertical([{1,2,4}, {3}, {12,16}]) == [[{1, 2, 3, 4}, {12, 16}]]\n",
"# assert merge_vertical([{1, 2}, {3, 4}]) == [[{1, 2, 3, 4}]]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(['C', [1, 2, 4], 'T', [3]], __main__.Slice)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"base_graph[1], type(base_graph[1])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -273,7 +277,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -333,7 +337,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -373,20 +377,9 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[[{1, 2}, {3}]]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"def propagate_split(first_slice, second_slice):\n",
" \"\"\"When one slice has all paths in one node, and the next slice has the paths split, there are two options.\n",
Expand Down Expand Up @@ -426,20 +419,9 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[[{1, 2, 4}, {3}]]"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"def use_merging(graph, merge_fn, start_index):\n",
" \"\"\"Function that executes a given merge in the context of the whole graph then \n",
Expand Down

0 comments on commit fbd8a5b

Please sign in to comment.