Skip to content
This repository has been archived by the owner on Mar 20, 2020. It is now read-only.

Commit

Permalink
#23 WIP: Transitioning all code over to using Django Database and que…
Browse files Browse the repository at this point in the history
…ries in persistent memory. Tests still broken.
  • Loading branch information
josiahseaman committed Aug 15, 2019
1 parent d6db4b7 commit 3d558b6
Show file tree
Hide file tree
Showing 6 changed files with 149 additions and 102 deletions.
61 changes: 30 additions & 31 deletions Graph/gfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,9 @@ def topologicalSort(self):


class GFA:
def __init__(self, gfa: gfapy.Gfa):
def __init__(self, gfa: gfapy.Gfa, source_path: str):
self.gfa = gfa
self.source_path = source_path

# @classmethod
# def load_from_pickle(cls, file: str):
Expand All @@ -77,14 +78,14 @@ def load_from_xg(cls, file: str, xg_bin: str):
process.wait()
if process.returncode != 0:
raise OSError()
graph = cls(gfa)
graph = cls(gfa, file)
process.stdout.close()
return graph

@classmethod
def load_from_gfa(cls, file: str):
gfa = gfapy.Gfa.from_file(file)
graph = cls(gfa)
graph = cls(gfa, file)
return graph

# def save_as_pickle(self, outfile: str):
Expand All @@ -111,43 +112,41 @@ def from_graph(cls, graph: Graph):
gfa.add_line('\t'.join(['P', path.accession, node_series, ",".join(['*' for _ in path.nodes])]))
for node in graph.nodes.values(): # in no particular order
gfa.add_line('\t'.join(['S', str(node.id), node.seq]))
return cls(gfa)
return cls(gfa, "from Graph")

@property
def to_paths(self) -> List[Path]:
node_hash = {}
# create parent object for this genome
gdb = GraphGenome.objects.get_or_create(name=self.source_path)[0]
for segment in self.gfa.segments:
node_id = segment.name + "+"
node = Node(segment.sequence)
node_hash[node_id] = node

node_id = segment.name + "-"
node = Node(segment.sequence)
node_hash[node_id] = node
node_id = segment.name
Node.objects.get_or_create(seq=segment.sequence, name=node_id, graph=gdb)

paths = []
for path in self.gfa.paths:
nodes = []
for node in path.segment_names:
node_index = NodeTraversal(Node(node_hash[node.name + node.orient].seq, node.name), node.orient)
nodes.append(node_index)
paths.append(Path(path.name, nodes))

p = Path(path.name, graph=gdb).save()
p.append_gfa_nodes(path.segment_names)
paths.append(p)
# path_names = [path.name for path in self.gfa.paths]
# list(Path.objects.get(name__in=path_names))
return paths

@property
def to_graph(self):
def to_graph(self) -> GraphGenome:
paths = self.to_paths()
if paths:
return paths[0].graph
else:
return None

# Extract all paths into graph
path_names = [p.name for p in self.gfa.paths]
graph = Graph(path_names) # Paths can be empty at start
for path in self.gfa.paths:
for node in path.segment_names:
graph.append_node_to_path(node.name, node.orient, path.name)
for segment in self.gfa.segments:
graph.nodes[segment.name].seq = segment.sequence
graph.paths = self.to_paths
return graph
# IMPORTANT: It's not clear to Josiah how much of the below is necessary, so it's being left unmodified.
# path_names = [p.name for p in self.gfa.paths]
# graph = Graph(path_names) # Paths can be empty at start
# for path in self.gfa.paths:
# for path_index, node in enumerate(path.segment_names):
# graph.append_node_to_path(node.name, node.orient, path.name, path_index)
# for segment in self.gfa.segments:
# graph.nodes[segment.name].seq = segment.sequence
# graph.paths = self.to_paths()
# return graph


'''
Expand Down
51 changes: 51 additions & 0 deletions Graph/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# Generated by Django 2.2.1 on 2019-08-14 14:28

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

initial = True

dependencies = [
]

operations = [
migrations.CreateModel(
name='GraphGenome',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('name', models.CharField(max_length=1000)),
],
),
migrations.CreateModel(
name='Node',
fields=[
('seq', models.CharField(blank=True, max_length=255)),
('name', models.CharField(max_length=15, primary_key=True, serialize=False)),
('graph', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='Graph.GraphGenome')),
],
options={
'unique_together': {('graph', 'name')},
},
),
migrations.CreateModel(
name='Path',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('accession', models.CharField(max_length=1000, unique=True)),
('graph', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='Graph.GraphGenome')),
],
),
migrations.CreateModel(
name='NodeTraversal',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('order', models.IntegerField(help_text='Defines the order a path lists traversals')),
('strand', models.CharField(choices=[('+', '+'), ('-', '-')], default='+', max_length=1)),
('node', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='Graph.Node')),
('path', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='Graph.Path')),
],
),
]
112 changes: 54 additions & 58 deletions Graph/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,11 @@
from uuid import uuid1

from django.db import models

from Graph.utils import keydefaultdict
from Utils.models import CustomSaveManager


# GraphGenome specific error classes for more informative error catching
class NoAnchorError(ValueError):
pass
class PathOverlapError(ValueError):
Expand All @@ -23,35 +24,27 @@ class GraphGenome(models.Model):
name = models.CharField(max_length=1000)


class DoubleNode():
plus = Node
minus = Node

def visitors(self):
plus_node_set.union(minus_node_set)


class Node(models.Model):
seq = models.CharField(max_length=255, blank=True)
name = models.CharField(primary_key=True)
name = models.CharField(primary_key=True, max_length=15)
graph = models.ForeignKey(GraphGenome, on_delete=models.CASCADE)

class Meta:
unique_together = ['graph', 'name']

def __len__(self):
return len(self.paths)

def __repr__(self):
"""Paths representation is sorted because set ordering is not guaranteed."""
return repr(self.seq) + \
', {' + ', '.join(str(i) for i in list(self.paths)) + '}'

def __eq__(self, other):
if not isinstance(other, Node):
print("Warn: comparing Node and ", type(other), other)
return False
return self.seq == other.seq and self.paths == other.paths # and self.id == other.id
# def __len__(self):
# return nodetraversal_set.count()
#
# def __repr__(self):
# """Paths representation is sorted because set ordering is not guaranteed."""
# return repr(self.seq) + \
# ', {' + ', '.join(str(i) for i in list(self.paths)) + '}'
#
# def __eq__(self, other):
# if not isinstance(other, Node):
# print("Warn: comparing Node and ", type(other), other)
# return False
# return self.seq == other.seq and self.paths == other.paths # and self.id == other.id

def __hash__(self):
return hash(self.seq)
Expand All @@ -64,20 +57,20 @@ def __hash__(self):
def to_gfa(self, segment_id: int):
return '\t'.join(['S', str(segment_id), self.seq])

# Typing is picky about order of declaration, but strings bypass this PEP484
def merge_minor(self, minor_allele: 'Node') -> 'Node':
m = Node(self.seq, self.paths.union(minor_allele.paths))
# TODO: penalize paths with nucleotide mismatch
return m

def intersection(self, downstream: 'Node') -> 'Node':
m = Node(self.seq + downstream.seq,
self.paths.intersection(downstream.paths))
return m

def union(self, downstream: 'Node') -> 'Node':
return Node(self.seq + downstream.seq,
self.paths.union(downstream.paths))
# # Typing is picky about order of declaration, but strings bypass this PEP484
# def merge_minor(self, minor_allele: 'Node') -> 'Node':
# m = Node(self.seq, self.paths.union(minor_allele.paths))
# # TODO: penalize paths with nucleotide mismatch
# return m
#
# def intersection(self, downstream: 'Node') -> 'Node':
# m = Node(self.seq + downstream.seq,
# self.paths.intersection(downstream.paths))
# return m
#
# def union(self, downstream: 'Node') -> 'Node':
# return Node(self.seq + downstream.seq,
# self.paths.union(downstream.paths))

class Slice:
def __init__(self, nodes: Iterable[Node]):
Expand Down Expand Up @@ -138,10 +131,7 @@ class Path(models.Model):
sequences is the accession's genome. Create Paths first from accession names, then append
them to Nodes to link together."""
accession = models.CharField(unique=True, max_length=1000) # one path per accession

# def __init__(self, accession: str, nodes = []):
# # self.nodes = nodes # List[NodeTraversal]
# self.position_checkpoints = {} # TODO: currently not used
graph = models.ForeignKey(GraphGenome, on_delete=models.CASCADE)

def __getitem__(self, path_index):
return self.nodes[path_index]
Expand All @@ -158,19 +148,24 @@ def __hash__(self):

@property
def nodes(self):
return NodeTraversal.objects.get(path=self)#.order_by('order')
return NodeTraversal.objects.get(path=self).order_by('order')

def append_gfa_nodes(self, nodes):
assert hasattr(nodes[0], 'orient') and hasattr(nodes[0], 'name'), 'Expecting gfapy.Gfa.path'
for node in nodes:
NodeTraversal(node=Node.objects.get(name=node.name),
path=self, strand=node.orient).save()

def append_node(self, node: Node, strand: str):
def append_node(self, node: Node, path_index, strand: str):
"""This is the preferred way to build a graph in a truly non-linear way.
NodeTraversal is appended to Path (order dependent) and PathIndex is added to Node (order independent)."""
NodeTraversal(node, self, strand).save()
return node
NodeTraversal(node, self, strand, path_index).save()

@classmethod
def build(cls, name: str, seq_of_nodes: List[str]):
node = Node.objects.create(seq)
for p in paths:
NodeTraversal.objects.create(node, path)
# @classmethod
# def build(cls, name: str, seq_of_nodes: List[str]):
# node = Node.objects.create(seq)
# for p in paths:
# NodeTraversal.objects.create(node, path)

def name(self):
return self.accession
Expand All @@ -183,10 +178,11 @@ def to_gfa(self):

class NodeTraversal(models.Model):
"""Link from a Path to a Node it is currently traversing. Includes strand"""
node = models.ForeignKey(Node, index=True, on_delete=models.CASCADE)
path = models.ForeignKey(Path, index=True, on_delete=models.CASCADE, help_text='')
order = models.IntegerField(help_text='Defines the order a path lists traversals')
node = models.ForeignKey(Node, db_index=True, on_delete=models.CASCADE)
path = models.ForeignKey(Path, db_index=True, on_delete=models.CASCADE, help_text='')
strand = models.CharField(choices=[('+', '+'),('-', '-')], default='+', max_length=1)
order = models.IntegerField(help_text='Defines the order a path lists traversals') # set automatically

objects = CustomSaveManager()

def __repr__(self):
Expand All @@ -200,13 +196,13 @@ def __eq__(self, other):
return self.node.id == other.node.id and self.strand == other.strand

def save(self, **kwargs):
"""IMPORTANT NOTE: save() does not get called if you do NodeTraverseal.objects.create
"""Checks the largest 'order' value in the current path and increments by 1.
IMPORTANT NOTE: save() does not get called if you do NodeTraverseal.objects.create
or get_or_create"""
self.order = self.path.nodetraversal_set.all().order_by('-order').first().order + 1
# self.order = self.path.nodetraversal_set.all().order_by('-order').first().order + 1
super(NodeTraversal, self).save(**kwargs)



class Graph:
def __init__(self, paths: Iterable = None):
# This can create orphan Nodes with no traversals
Expand Down Expand Up @@ -251,7 +247,7 @@ def save_as_xg(self, file: str, xg_bin: str):
gfa = GFA.from_graph(self)
gfa.save_as_xg(file, xg_bin)

def append_node_to_path(self, node_id, strand, path_name):
def append_node_to_path(self, node_id, strand, path_name, path_index):
"""This is the preferred way to build a graph in a truly non-linear way.
Nodes will be created if necessary.
NodeTraversal is appended to Path (order dependent) and PathIndex is added to Node
Expand All @@ -261,7 +257,7 @@ def append_node_to_path(self, node_id, strand, path_name):
self.nodes[node_id] = Node('', [], node_id)
else:
raise ValueError("Provide the id of the node, not", node_id)
self.paths[path_name].append_node(self.nodes[node_id], strand)
self.paths[path_name].append_node(self.nodes[node_id], path_index, strand)
#
# def compute_slices(self):
# """Alias: Upgrades a Graph to a SlicedGraph"""
Expand Down
Loading

0 comments on commit 3d558b6

Please sign in to comment.