Skip to content
This repository has been archived by the owner on Mar 20, 2020. It is now read-only.

Commit

Permalink
#3 New object notation for Graphs.
Browse files Browse the repository at this point in the history
  • Loading branch information
josiahseaman committed Jun 19, 2019
1 parent 092ffc8 commit e8c11ac
Showing 1 changed file with 91 additions and 28 deletions.
119 changes: 91 additions & 28 deletions Graph_Summarization_Prototype.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,44 +45,49 @@
},
{
"cell_type": "code",
"execution_count": 50,
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"6"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
"name": "stdout",
"output_type": "stream",
"text": [
"c [['ACGT', {1, 2, 3, 4}], ['C', {1, 2, 4}, 'T', {3}], ['GGA', {1, 2, 3, 4}], ['C', {1, 2, 4}, '', {3}], ['AGTACG', {1, 2, 3}, 'CGTACT', {4}], ['TTG', {1, 2, 3, 4}]]\n",
"c [['ACGT', [1, 2, 3, 4]], ['C', [1, 2, 4], 'T', [3]], ['GGA', [1, 2, 3, 4]], ['C', [1, 2, 4], '', [3]], ['AGTACG', [1, 2, 3], 'CGTACT', [4]], ['TTG', [1, 2, 3, 4]]]\n",
"c [['ACGT', [1, 2, 3, 4]], ['C', [1, 2, 4], 'T', [3]], ['GGA', [1, 2, 3, 4]], ['C', [1, 2, 4], '', [3]], ['AGTACG', [1, 2, 3], 'CGTACT', [4]], ['TTG', [1, 2, 3, 4]]]\n",
"c [['ACGT', [1, 2, 3, 4]], ['C', [1, 2, 4], 'T', [3]], ['GGA', [1, 2, 3, 4]], ['C', [1, 2, 4], '', [3]], ['AGTACG', [1, 2, 3], 'CGTACT', [4]], ['TTG', [1, 2, 3, 4]]]\n"
]
}
],
"source": [
"import unittest \n",
"test = unittest.TestCase() # just using it for assertRaises\n",
"from typing import Callable, Iterator, Union, Optional, List\n",
"from typing import Callable, Iterator, Union, Optional, List, Iterable\n",
"from collections import namedtuple\n",
"# %debug\n",
"\n",
"#Node = namedtuple('Node', ['seq', 'paths'])\n",
"class Node:\n",
" def __init__(self, seq, paths):\n",
" def __init__(self, seq: str, paths: List[int]):\n",
" assert isinstance(seq, str), seq\n",
" assert not isinstance(paths, str) and isinstance(paths, Iterable), paths\n",
" self.seq = seq\n",
" self.paths = paths\n",
" def __len__(self):\n",
" return len(self.paths)\n",
" def __repr__(self):\n",
" return self.paths.__repr__()\n",
" \n",
" def merge(self, smaller: Node) -> Node:\n",
" m = Node(self.seq, self.paths.union(smaller.paths))\n",
" # TODO: penalize paths with nucleotide mismatch\n",
" return m\n",
" return repr(self.seq) + ', ' + repr(sorted(list(self.paths)))\n",
"\n",
"def merge(self, smaller: Node) -> Node:\n",
" m = Node(self.seq, self.paths.union(smaller.paths))\n",
" # TODO: penalize paths with nucleotide mismatch\n",
" return m\n",
"Node.merge = merge\n",
"\n",
" \n",
"class Slice:\n",
" def __init__(self, nodes):\n",
" self.nodes = [nodes] if isinstance(nodes, Node) else nodes\n",
" def __init__(self, nodes: List[Node]):\n",
" self.nodes = nodes #[nodes] if isinstance(nodes, Node) else nodes\n",
" def alternatives(self, main):\n",
" return self.nodes.difference({main})\n",
" def bystanders(self, first,second):\n",
Expand All @@ -100,16 +105,54 @@
" def smallest(self):\n",
" return min(reversed(self.nodes), key=len) # when they're the same size it will take the last listed\n",
"\n",
"class Graph:\n",
" def __init__(self, cmd: List):\n",
" \"\"\"Factory for generating graphs from a representation\"\"\"\n",
" self.slices = []\n",
" if cmd[0] and isinstance(cmd[0][0], Node):\n",
" self.slices = cmd # doesn't need to be parsed\n",
" else:\n",
" if isinstance(cmd, str):\n",
" cmd = eval(cmd)\n",
" print('c', cmd)\n",
" for sl in cmd:\n",
" current_slice = []\n",
" try:\n",
" for i in range(0, len(sl), 2):\n",
" current_slice.append(Node(sl[i], sl[i+1]))\n",
" except IndexError:\n",
" print(\"Expecting two terms: \", sl[i:i+2])\n",
" self.slices.append(current_slice)\n",
" \n",
" def __repr__(self):\n",
" \"\"\"Warning: the representation strings are very sensitive to whitespace\"\"\"\n",
" return self.slices.__repr__()\n",
" def __getitem__(self, i):\n",
" return self.slices[i]\n",
" def __eq__(self, representation):\n",
" if isinstance(representation, Graph):\n",
" return str(self) == str(representation)\n",
" return str(self.slices) == str(Graph(representation).slices)\n",
" \n",
" \n",
" \n",
"#base_graph = [ [{1,2,3,4}], [{1,2,4},{3}], [{1,2,3,4}], [{1,2,4},{3}], [{1,2,3},{4}], [{1,2,3,4}] ]\n",
"factory_input = [ ['ACGT',{1,2,3,4}], ['C',{1,2,4},'T',{3}], ['GGA',{1,2,3,4}], \n",
" ['C',{1,2,4},'',{3}], ['AGTACG',{1,2,3},'CGTACT',{4}], ['TTG',{1,2,3,4}] ]\n",
"base_graph = [Slice(Node('ACGT', {1,2,3,4})), \n",
" Slice([Node('C',{1,2,4}),Node('T', {3})]), \n",
" Slice([Node('GGA',{1,2,3,4})]), \n",
" Slice([Node('C',{1,2,4}),Node('', {3})]),\n",
" Slice([Node('AGTACG',{1,2,3}), Node('CGTACT',{4})]),\n",
" Slice(Node('TTG',{1,2,3,4})) ]\n",
"\n",
"\n",
"len(base_graph)"
"g = Graph(factory_input)\n",
"repr(g) == str([['ACGT',[1,2,3,4]],['C',[1,2,4],'T',[3]],['GGA',[1,2,3,4]],['C',[1,2,4],'',[3]],['AGTACG',[1,2,3],'CGTACT',[4]],['TTG',[1,2,3,4]]])\n",
"g_double = Graph(eval(str(g)))\n",
"str(g_double) == str(g)\n",
"assert g_double == g\n",
"assert g_double == [['ACGT',[1,2,3,4]],['C',[1,2,4],'T',[3]],['GGA',[1,2,3,4]],['C',[1,2,4],'',[3]],['AGTACG',[1,2,3],'CGTACT',[4]],['TTG',[1,2,3,4]]]\n",
"assert g_double == \"[['ACGT',[1,2,3,4]],['C',[1,2,4],'T',[3]],['GGA',[1,2,3,4]],['C',[1,2,4],'',[3]],['AGTACG',[1,2,3],'CGTACT',[4]],['TTG',[1,2,3,4]]]\""
]
},
{
Expand All @@ -131,16 +174,16 @@
},
{
"cell_type": "code",
"execution_count": 60,
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
"[['C', [1, 2, 3, 4]]]"
]
},
"execution_count": 60,
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -153,14 +196,34 @@
" biggest_node = one_slice.primary()\n",
" merger = biggest_node.merge(smallest_node)\n",
" if len(one_slice) == 2: \n",
" return Slice(merger)\n",
" else:#Allows for third possibilities\n",
" return Slice([merger, *one_slice.bystanders(smallest_node, biggest_node)])\n",
"merge_vertical(base_graph[1]) == str([{1, 2, 3, 4}])\n",
" return Graph([[merger]])\n",
" else: #Allows for third possibilities\n",
" return Graph([[merger, *one_slice.bystanders(smallest_node, biggest_node)]])\n",
"(merge_vertical(base_graph[1]))# == str([{1, 2, 3, 4}])\n",
"# assert merge_vertical([{1,2,4}, {3}, {12,16}]) == [[{1, 2, 3, 4}, {12, 16}]]\n",
"# assert merge_vertical([{1, 2}, {3, 4}]) == [[{1, 2, 3, 4}]]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(['C', [1, 2, 4], 'T', [3]], __main__.Slice)"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"base_graph[1], type(base_graph[1])"
]
},
{
"cell_type": "code",
"execution_count": 8,
Expand Down

0 comments on commit e8c11ac

Please sign in to comment.