From 91f21686d8b29c84d8d4b0f75b28b1008d7ed6cb Mon Sep 17 00:00:00 2001
From: Josiah Seaman <josiah@newline.us>
Date: Thu, 15 Aug 2019 11:45:44 +0100
Subject: [PATCH] Added comments and tweaks to respond to Simon's review
 comments.

---
 Graph/models.py              |  8 ++++++++
 HaploBlocker/haplonetwork.py | 19 ++++++++++++++-----
 HaploBlocker/tests.py        | 10 +++++-----
 README.md                    |  6 ++++--
 requirements_dev.txt         |  2 +-
 5 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/Graph/models.py b/Graph/models.py
index de46ab7..42312bc 100644
--- a/Graph/models.py
+++ b/Graph/models.py
@@ -225,17 +225,25 @@ def __eq__(self, representation):
         raise TypeError("Graphs can only compare with other Graphs", type(representation))
 
     def load_from_pickle(self, file: str):
+        """Pickle is a python specific file that dumps the exact state of a python objects
+        from memory."""
         self = pickle.load(file)
 
     def load_from_xg(self, file: str, xg_bin: str):
+        """XG is a graph format used by VG (variation graph).  This method builds a
+        database GraphGenome to exactly mirror the contents of an XG file."""
         from Graph.gfa import GFA
         gfa = GFA.load_from_xg(file, xg_bin)
         self = gfa.to_graph()
 
     def save_as_pickle(self, file):
+        """Pickle is a python specific file that dumps the exact state of a python objects
+        from memory."""
         pickle.dump(self, file)
 
     def save_as_xg(self, file: str, xg_bin: str):
+        """XG is a graph format used by VG (variation graph).  This method exports
+        a database GraphGenome as an XG file."""
         from Graph.gfa import GFA
         gfa = GFA.from_graph(self)
         gfa.save_as_xg(file, xg_bin)
diff --git a/HaploBlocker/haplonetwork.py b/HaploBlocker/haplonetwork.py
index bedec36..7ec3a81 100644
--- a/HaploBlocker/haplonetwork.py
+++ b/HaploBlocker/haplonetwork.py
@@ -17,8 +17,8 @@ def first(iterable):
 
 
 class Point:
-    def __init__(self, snp, bp=0):
-        self.snp, self.bp = snp, bp
+    def __init__(self, snp):
+        self.snp = snp
 
     @property
     def window(self):
@@ -27,7 +27,10 @@ def window(self):
 
 class Node:
     """This definition of Node is designed to be equivalent to the R code HaploBlocker Nodes.
-    It has no concept of strand, CNV.  It uses an absolute Start and End position, but those
+    This will be combined with the VG definition of Graph.models.Node and extended to support the
+    concept of summarization layers.
+
+    Currently, It has no concept of strand, CNV.  It uses an absolute Start and End position, but those
     are not referenced very often.
     Critically, it needs Node.NOTHING which is used frequently to mark specimens whose
     upstream or downstream nodes have been pruned.  The usage of Node.NOTHING is equivalent to
@@ -87,6 +90,12 @@ def is_end(self) -> bool:
         return len(self.downstream) == 1 and first(self.downstream).is_nothing()
 
 
+# Node.NOTHING is essential "Not Applicable" when used to track transition rates between nodes.
+# Node.NOTHING is an important concept to Haploblocker, used to track upstream and downstream
+# that transitions to an unknown or untracked state.  As neglect_nodes removes minority
+# allele nodes, there will be specimens downstream that "come from" Node.NOTHING, meaning their
+# full history is no longer tracked.  Node.NOTHING is a regular exception case for missing data,
+# the ends of chromosomes, and the gaps between haplotype blocks.
 Node.NOTHING = Node(-1, Point(None), Point(None))
 
 
@@ -118,8 +127,8 @@ def get_unique_signatures(individuals, start_locus):
     for individual in individuals:
         sig = signature(individual, start_locus)
         if sig not in unique_blocks:
-            unique_blocks[sig] = Node(len(unique_blocks), Point(start_locus // BLOCK_SIZE, start_locus),
-                                      Point(start_locus // BLOCK_SIZE, start_locus + BLOCK_SIZE))  # TODO: -1?
+            unique_blocks[sig] = Node(len(unique_blocks), Point(start_locus // BLOCK_SIZE),
+                                      Point(start_locus // BLOCK_SIZE))  # TODO: -1?
     return unique_blocks
 
 
diff --git a/HaploBlocker/tests.py b/HaploBlocker/tests.py
index 47cd2c4..2576311 100644
--- a/HaploBlocker/tests.py
+++ b/HaploBlocker/tests.py
@@ -103,14 +103,14 @@ def test_split_one_group(self):
                      ['C', {a, b, d}, '', {c}],  # [3] repeated from [1] SNP
         """
         nodes = [
-            Node(91, Point(1), Point(1), {1,2,4}),
+            Node(91, Point(1), Point(1), {1, 2, 4}),
             Node(92, Point(1), Point(1), {3}),
-            Node(93, Point(2), Point(2), {1,2,3,4}),  # [2] anchor
-            Node(94, Point(3), Point(3), {1,2,4}),
+            Node(93, Point(2), Point(2), {1, 2, 3, 4}),  # [2] anchor
+            Node(94, Point(3), Point(3), {1, 2, 4}),
             Node(95, Point(3), Point(3), {3}),
             # additional bracketing to anchor
-            Node(90, Point(0), Point(0), {1,2,3,4}),
-            Node(96, Point(4), Point(4), {1,2,3,4})
+            Node(90, Point(0), Point(0), {1, 2, 3, 4}),
+            Node(96, Point(4), Point(4), {1, 2, 3, 4})
         ]
         # connections
         nodes[5].downstream[nodes[0]] = 3
diff --git a/README.md b/README.md
index 13bccff..7bcb24b 100644
--- a/README.md
+++ b/README.md
@@ -11,18 +11,20 @@ Browser for Graph Genomes built with VG.  Provides visualization for variation w
 # Developer Instructions
 **Environment**: [Anaconda 3.7 ](https://www.anaconda.com/distribution/)  
 Ggfapy etc. does not have an anaconda package, so it's necessary to use pip:  
-`pip install -r requirements.txt`  
+`pip install -r requirements_dev.txt`  
 
 
 **IDE:**  Pycharm Professional 2019.1 available for free for academics. 
 * Travis CI - automatically runs tests on master and development branches
 * Jupyter Notebook - run from the same Anaconda environment.  Notebooks are useful for prototyping, mature code gets moved to .py files for reuse.  They can be matured into a user manual.
 
-**Django:** This project uses django for its relational database.  db.sqlite3 is not included in the repo.  To setup Django database file run  
+**Django:** This project uses django for its relational database.  db.sqlite3 is not included in the repo.  You will need a super user to be able to browse the database in the administrator backend.  To setup Django database file run  
 ```
 python manage.py migrate
 python manage.py createsuperuser
 ```
+In development, changes to the models.py need a new migration created using  
+`python manage.py makemigrations`
 
 
 #### Branches
diff --git a/requirements_dev.txt b/requirements_dev.txt
index e69222f..e6632c4 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -6,4 +6,4 @@ numpy==1.16.2
 networkx==2.2
 
 
-# To setup Django database file run python manage.py migrate.  db.sqlite3 is not included in the repo.
\ No newline at end of file
+# See README.md for instructions to setup Django database file.  db.sqlite3 is not included in the repo.