Skip to content
This repository has been archived by the owner on Mar 20, 2020. It is now read-only.

Commit

Permalink
Added comments and tweaks to respond to Simon's review comments.
Browse files Browse the repository at this point in the history
  • Loading branch information
josiahseaman committed Aug 15, 2019
1 parent 4d42721 commit 91f2168
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 13 deletions.
8 changes: 8 additions & 0 deletions Graph/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,17 +225,25 @@ def __eq__(self, representation):
raise TypeError("Graphs can only compare with other Graphs", type(representation))

def load_from_pickle(self, file: str):
"""Pickle is a python specific file that dumps the exact state of a python objects
from memory."""
self = pickle.load(file)

def load_from_xg(self, file: str, xg_bin: str):
"""XG is a graph format used by VG (variation graph). This method builds a
database GraphGenome to exactly mirror the contents of an XG file."""
from Graph.gfa import GFA
gfa = GFA.load_from_xg(file, xg_bin)
self = gfa.to_graph()

def save_as_pickle(self, file):
"""Pickle is a python specific file that dumps the exact state of a python objects
from memory."""
pickle.dump(self, file)

def save_as_xg(self, file: str, xg_bin: str):
"""XG is a graph format used by VG (variation graph). This method exports
a database GraphGenome as an XG file."""
from Graph.gfa import GFA
gfa = GFA.from_graph(self)
gfa.save_as_xg(file, xg_bin)
Expand Down
19 changes: 14 additions & 5 deletions HaploBlocker/haplonetwork.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ def first(iterable):


class Point:
def __init__(self, snp, bp=0):
self.snp, self.bp = snp, bp
def __init__(self, snp):
self.snp = snp

@property
def window(self):
Expand All @@ -27,7 +27,10 @@ def window(self):

class Node:
"""This definition of Node is designed to be equivalent to the R code HaploBlocker Nodes.
It has no concept of strand, CNV. It uses an absolute Start and End position, but those
This will be combined with the VG definition of Graph.models.Node and extended to support the
concept of summarization layers.
Currently, It has no concept of strand, CNV. It uses an absolute Start and End position, but those
are not referenced very often.
Critically, it needs Node.NOTHING which is used frequently to mark specimens whose
upstream or downstream nodes have been pruned. The usage of Node.NOTHING is equivalent to
Expand Down Expand Up @@ -87,6 +90,12 @@ def is_end(self) -> bool:
return len(self.downstream) == 1 and first(self.downstream).is_nothing()


# Node.NOTHING is essential "Not Applicable" when used to track transition rates between nodes.
# Node.NOTHING is an important concept to Haploblocker, used to track upstream and downstream
# that transitions to an unknown or untracked state. As neglect_nodes removes minority
# allele nodes, there will be specimens downstream that "come from" Node.NOTHING, meaning their
# full history is no longer tracked. Node.NOTHING is a regular exception case for missing data,
# the ends of chromosomes, and the gaps between haplotype blocks.
Node.NOTHING = Node(-1, Point(None), Point(None))


Expand Down Expand Up @@ -118,8 +127,8 @@ def get_unique_signatures(individuals, start_locus):
for individual in individuals:
sig = signature(individual, start_locus)
if sig not in unique_blocks:
unique_blocks[sig] = Node(len(unique_blocks), Point(start_locus // BLOCK_SIZE, start_locus),
Point(start_locus // BLOCK_SIZE, start_locus + BLOCK_SIZE)) # TODO: -1?
unique_blocks[sig] = Node(len(unique_blocks), Point(start_locus // BLOCK_SIZE),
Point(start_locus // BLOCK_SIZE)) # TODO: -1?
return unique_blocks


Expand Down
10 changes: 5 additions & 5 deletions HaploBlocker/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,14 @@ def test_split_one_group(self):
['C', {a, b, d}, '', {c}], # [3] repeated from [1] SNP
"""
nodes = [
Node(91, Point(1), Point(1), {1,2,4}),
Node(91, Point(1), Point(1), {1, 2, 4}),
Node(92, Point(1), Point(1), {3}),
Node(93, Point(2), Point(2), {1,2,3,4}), # [2] anchor
Node(94, Point(3), Point(3), {1,2,4}),
Node(93, Point(2), Point(2), {1, 2, 3, 4}), # [2] anchor
Node(94, Point(3), Point(3), {1, 2, 4}),
Node(95, Point(3), Point(3), {3}),
# additional bracketing to anchor
Node(90, Point(0), Point(0), {1,2,3,4}),
Node(96, Point(4), Point(4), {1,2,3,4})
Node(90, Point(0), Point(0), {1, 2, 3, 4}),
Node(96, Point(4), Point(4), {1, 2, 3, 4})
]
# connections
nodes[5].downstream[nodes[0]] = 3
Expand Down
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,20 @@ Browser for Graph Genomes built with VG. Provides visualization for variation w
# Developer Instructions
**Environment**: [Anaconda 3.7 ](https://www.anaconda.com/distribution/)
Ggfapy etc. does not have an anaconda package, so it's necessary to use pip:
`pip install -r requirements.txt`
`pip install -r requirements_dev.txt`


**IDE:** Pycharm Professional 2019.1 available for free for academics.
* Travis CI - automatically runs tests on master and development branches
* Jupyter Notebook - run from the same Anaconda environment. Notebooks are useful for prototyping, mature code gets moved to .py files for reuse. They can be matured into a user manual.

**Django:** This project uses django for its relational database. db.sqlite3 is not included in the repo. To setup Django database file run
**Django:** This project uses django for its relational database. db.sqlite3 is not included in the repo. You will need a super user to be able to browse the database in the administrator backend. To setup Django database file run
```
python manage.py migrate
python manage.py createsuperuser
```
In development, changes to the models.py need a new migration created using
`python manage.py makemigrations`


#### Branches
Expand Down
2 changes: 1 addition & 1 deletion requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ numpy==1.16.2
networkx==2.2


# To setup Django database file run python manage.py migrate. db.sqlite3 is not included in the repo.
# See README.md for instructions to setup Django database file. db.sqlite3 is not included in the repo.

0 comments on commit 91f2168

Please sign in to comment.