From 13d52b965d517717004619c7865e18f2ce17dc83 Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Wed, 9 Sep 2020 15:03:20 +0100 Subject: [PATCH 01/15] License autodection --- .gitignore | 2 +- LICENCE.txt | 14 +++++--------- setup.py | 2 +- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/.gitignore b/.gitignore index 2692693..513a9c4 100755 --- a/.gitignore +++ b/.gitignore @@ -36,7 +36,7 @@ nosetests.xml .pydevproject # Temp files - +.vscode/ *~ # Pipy codes diff --git a/LICENCE.txt b/LICENCE.txt index f7a40d9..ace49c0 100755 --- a/LICENCE.txt +++ b/LICENCE.txt @@ -1,8 +1,4 @@ - -The MIT License (MIT) -[OSI Approved License] - -The MIT License (MIT) +MIT License Copyright (c) 2017 Edinburgh Genome Foundry @@ -13,13 +9,13 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/setup.py b/setup.py index 4eec99b..5228aad 100755 --- a/setup.py +++ b/setup.py @@ -13,7 +13,7 @@ description="DNA overhangs design for Golden Gate etc.", url="https://github.com/Edinburgh-Genome-Foundry/GoldenHinges", long_description=open("pypi-readme.rst").read(), - license="see LICENSE.txt", + license="MIT", keywords="DNA assembly overhangs constraint-programming synthetic-biology", packages=find_packages(exclude="docs"), install_requires=[ From d43feb2d9386d1b8afc61783470b689ee519bc0f Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Wed, 9 Sep 2020 15:37:25 +0100 Subject: [PATCH 02/15] Biopython v1.78 fix --- goldenhinges/biotools.py | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/goldenhinges/biotools.py b/goldenhinges/biotools.py index f728bbc..5ee9336 100755 --- a/goldenhinges/biotools.py +++ b/goldenhinges/biotools.py @@ -1,12 +1,20 @@ import itertools as itt from copy import deepcopy from functools import lru_cache +import numpy as np from Bio.SeqFeature import FeatureLocation, SeqFeature from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord -from Bio.Alphabet import DNAAlphabet from Bio import SeqIO -import numpy as np + +try: + # Biopython <1.78 + from Bio.Alphabet import DNAAlphabet + + has_dna_alphabet = True +except ImportError: + # Biopython >=1.78 + has_dna_alphabet = False complements = {"A": "T", "T": "A", "C": "G", "G": "C"} @@ -101,9 +109,7 @@ def crop_record(record, crop_start, crop_end, features_suffix=" (part)"): new_start, new_end = new_start - crop_start, new_end - crop_start feature = deepcopy(feature) - feature.location = FeatureLocation( - new_start, new_end, feature.location.strand - ) + feature.location = FeatureLocation(new_start, new_end, feature.location.strand) label = "".join(feature.qualifiers.get("label", "")) feature.qualifiers["label"] = label + features_suffix features.append(feature) @@ -133,11 +139,7 @@ def sequences_differences_segments(seq1, seq2): def annotate_record( - seqrecord, - location="full", - feature_type="misc_feature", - margin=0, - **qualifiers + seqrecord, location="full", feature_type="misc_feature", margin=0, **qualifiers ): """Add a feature to a Biopython SeqRecord. @@ -175,12 +177,12 @@ def annotate_record( def sequence_to_biopython_record( sequence, id="", name="", features=() ): - return SeqRecord( - Seq(sequence, alphabet=DNAAlphabet()), - id=id, - name=name, - features=list(features), - ) + if has_dna_alphabet: # Biopython <1.78 + sequence = Seq(sequence, alphabet=DNAAlphabet()) + else: + sequence = Seq(sequence) + + return SeqRecord(sequence, id=id, name=name, features=list(features),) def load_record(filename, linear=True, name="unnamed", fmt="auto"): From 46a819512aef7b382fcb8b40ff2ac225d81afb7a Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Wed, 9 Sep 2020 16:42:02 +0100 Subject: [PATCH 03/15] Biopython v1.78 fix --- goldenhinges/biotools.py | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/goldenhinges/biotools.py b/goldenhinges/biotools.py index f728bbc..549522e 100755 --- a/goldenhinges/biotools.py +++ b/goldenhinges/biotools.py @@ -1,12 +1,20 @@ import itertools as itt from copy import deepcopy from functools import lru_cache +import numpy as np from Bio.SeqFeature import FeatureLocation, SeqFeature from Bio.Seq import Seq from Bio.SeqRecord import SeqRecord -from Bio.Alphabet import DNAAlphabet from Bio import SeqIO -import numpy as np + +try: + # Biopython <1.78 + from Bio.Alphabet import DNAAlphabet + + has_dna_alphabet = True +except ImportError: + # Biopython >=1.78 + has_dna_alphabet = False complements = {"A": "T", "T": "A", "C": "G", "G": "C"} @@ -101,9 +109,7 @@ def crop_record(record, crop_start, crop_end, features_suffix=" (part)"): new_start, new_end = new_start - crop_start, new_end - crop_start feature = deepcopy(feature) - feature.location = FeatureLocation( - new_start, new_end, feature.location.strand - ) + feature.location = FeatureLocation(new_start, new_end, feature.location.strand) label = "".join(feature.qualifiers.get("label", "")) feature.qualifiers["label"] = label + features_suffix features.append(feature) @@ -133,11 +139,7 @@ def sequences_differences_segments(seq1, seq2): def annotate_record( - seqrecord, - location="full", - feature_type="misc_feature", - margin=0, - **qualifiers + seqrecord, location="full", feature_type="misc_feature", margin=0, **qualifiers ): """Add a feature to a Biopython SeqRecord. @@ -175,12 +177,15 @@ def annotate_record( def sequence_to_biopython_record( sequence, id="", name="", features=() ): - return SeqRecord( - Seq(sequence, alphabet=DNAAlphabet()), - id=id, - name=name, - features=list(features), - ) + if has_dna_alphabet: # Biopython <1.78 + sequence = Seq(sequence, alphabet=DNAAlphabet()) + else: + sequence = Seq(sequence) + + seqrecord = SeqRecord(sequence, id=id, name=name, features=list(features),) + seqrecord.annotations["molecule_type"] = "DNA" + + return seqrecord def load_record(filename, linear=True, name="unnamed", fmt="auto"): From 3113595430a0e11a2f43a271cb259bf7fa27fcf2 Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Thu, 10 Sep 2020 11:34:35 +0100 Subject: [PATCH 04/15] SeqRecord annotations DNA added --- goldenhinges/reports.py | 42 +++++++++++------------------------------ 1 file changed, 11 insertions(+), 31 deletions(-) diff --git a/goldenhinges/reports.py b/goldenhinges/reports.py index 6533a8a..1bdb619 100755 --- a/goldenhinges/reports.py +++ b/goldenhinges/reports.py @@ -31,12 +31,7 @@ def new_sequence_from_cutting_solution(solution, sequence): def write_report_for_cutting_solution( - solution, - target, - sequence, - left_flank="", - right_flank="", - display_positions=False, + solution, target, sequence, left_flank="", right_flank="", display_positions=False, ): """Write a complete report for Type IIS arbitrary sequence assembly. @@ -108,26 +103,19 @@ def write_report_for_cutting_solution( gr = translator.translate_record(plot_record) ax, _ = gr.plot(with_ruler=False, figure_width=max(8, len(solution) / 2)) ax.set_title( - "Selected overhangs", - loc="left", - fontdict=dict(weight="bold", fontsize=13), + "Selected overhangs", loc="left", fontdict=dict(weight="bold", fontsize=13), ) # ax.figure.set_size_inches((max(8, 0.7*len(o)), 2)) ax.set_ylim(top=ax.get_ylim()[1] + 2) xx = [x for (a, b) in edited_segments for x in range(a, b)] - ax.plot( - xx, [0 for x in xx], marker="o", c="r", lw=0, label="sequence edits" - ) + ax.plot(xx, [0 for x in xx], marker="o", c="r", lw=0, label="sequence edits") L = len(sequence) ax.set_xlim(-0.1 * L, 1.1 * L) ax.legend(loc=2, fontsize=12) locs = sorted([o["location"] for o in solution]) diffs = np.diff(locs) - text = "Segment size: %d +/- %d bp. (mean +/- 1std)" % ( - diffs.mean(), - diffs.std(), - ) + text = "Segment size: %d +/- %d bp. (mean +/- 1std)" % (diffs.mean(), diffs.std(),) ax.text( L / 2, -1, @@ -137,9 +125,7 @@ def write_report_for_cutting_solution( fontsize=14, ) ax.figure.savefig( - root._file("summary_plot.pdf").open("wb"), - format="pdf", - bbox_inches="tight", + root._file("summary_plot.pdf").open("wb"), format="pdf", bbox_inches="tight", ) plt.close(ax.figure) @@ -148,9 +134,7 @@ def write_report_for_cutting_solution( report_record = deepcopy(record) report_record.seq = sequence_to_biopython_record(new_sequence).seq for (start, end) in edited_segments: - annotate_record( - report_record, (int(start), int(end), 0), label="!edited" - ) + annotate_record(report_record, (int(start), int(end), 0), label="!edited") for o in solution: start = int(o["location"]) end = int(o["location"] + len(o["sequence"])) @@ -163,9 +147,7 @@ def write_report_for_cutting_solution( fragments_records_dir = root._dir("fragments_records") overhang_length = len(solution[0]["sequence"]) if solution[0]["location"] != 0: - solution = [ - {"location": 0, "sequence": sequence[:overhang_length]} - ] + solution + solution = [{"location": 0, "sequence": sequence[:overhang_length]}] + solution if solution[-1]["location"] != L - overhang_length: solution = solution + [ { @@ -178,14 +160,12 @@ def write_report_for_cutting_solution( start, end = o1["location"], o2["location"] + len(o2["sequence"]) fragment = crop_record(report_record, start, end) seqrecord = left_flank + fragment + right_flank - SeqIO.write( - seqrecord, fragments_records_dir._file(seqname + ".gb"), "genbank" - ) + seqrecord.annotations["molecule_type"] = "DNA" + + SeqIO.write(seqrecord, fragments_records_dir._file(seqname + ".gb"), "genbank") sequences.append(";".join([seqname, str(seqrecord.seq)])) root._file("fragments_sequences.csv").write("\n".join(sequences)) - root._file("overhangs_list.csv").write( - ", ".join([o["sequence"] for o in solution]) - ) + root._file("overhangs_list.csv").write(", ".join([o["sequence"] for o in solution])) return root._close() From 028e6c36257cc57fe5e50dbc8ca3ed78347627c9 Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Thu, 10 Sep 2020 11:37:01 +0100 Subject: [PATCH 05/15] Bio v1.78 fix --- goldenhinges/biotools.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/goldenhinges/biotools.py b/goldenhinges/biotools.py index c3a27ae..549522e 100755 --- a/goldenhinges/biotools.py +++ b/goldenhinges/biotools.py @@ -182,14 +182,10 @@ def sequence_to_biopython_record( else: sequence = Seq(sequence) -<<<<<<< HEAD seqrecord = SeqRecord(sequence, id=id, name=name, features=list(features),) seqrecord.annotations["molecule_type"] = "DNA" return seqrecord -======= - return SeqRecord(sequence, id=id, name=name, features=list(features),) ->>>>>>> d43feb2d9386d1b8afc61783470b689ee519bc0f def load_record(filename, linear=True, name="unnamed", fmt="auto"): From 2cf2edbb0f3d1ee0b1afc821e320082d2b79d3fd Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Thu, 10 Sep 2020 15:07:13 +0100 Subject: [PATCH 06/15] Black --- tests/test_basics.py | 81 ++++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 37 deletions(-) diff --git a/tests/test_basics.py b/tests/test_basics.py index f5611b9..e771261 100755 --- a/tests/test_basics.py +++ b/tests/test_basics.py @@ -4,26 +4,27 @@ import os import itertools import numpy as np -from goldenhinges import (OverhangsSelector, list_overhangs, gc_content, - sequences_differences, reverse_complement) -from dnachisel import (random_dna_sequence, sequence_to_biopython_record, - annotate_record) +from goldenhinges import ( + OverhangsSelector, + list_overhangs, + gc_content, + sequences_differences, + reverse_complement, +) +from dnachisel import random_dna_sequence, sequence_to_biopython_record, annotate_record import pytest @pytest.fixture def data(): - data_path = os.path.join('tests', 'test_data') + data_path = os.path.join("tests", "test_data") with open(os.path.join(data_path, "phage_sequence.txt"), "r") as f: phage_sequence = f.read() - return { - "phage_sequence": phage_sequence - } + return {"phage_sequence": phage_sequence} def test_generate_overhangs_collection(): - selector = OverhangsSelector(gc_min=0.5, gc_max=0.5, - differences=2, time_limit=2) + selector = OverhangsSelector(gc_min=0.5, gc_max=0.5, differences=2, time_limit=2) collection = selector.generate_overhangs_set(n_overhangs=18, n_cliques=100) collection = selector.generate_overhangs_set(start_at=len(collection)) assert len(collection) == 24 @@ -33,61 +34,68 @@ def test_generate_overhangs_collection(): def test_generate_overhangs_collection2(): - selector = OverhangsSelector(gc_min=0.25, gc_max=0.75, - differences=2, time_limit=2) + selector = OverhangsSelector(gc_min=0.25, gc_max=0.75, differences=2, time_limit=2) collection = selector.generate_overhangs_set() assert len(collection) >= 24 for o1, o2 in itertools.combinations(collection, 2): assert sequences_differences(o1, o2) >= 2 assert sequences_differences(o1, reverse_complement(o2)) >= 2 + def test_generate_overhangs_collection_with_possible(): - selector = OverhangsSelector(gc_min=0.25, gc_max=0.75, - differences=1, - possible_overhangs=['ATTC', 'AAAA', 'GAAT', - 'CTCA'], - time_limit=2) + selector = OverhangsSelector( + gc_min=0.25, + gc_max=0.75, + differences=1, + possible_overhangs=["ATTC", "AAAA", "GAAT", "CTCA"], + time_limit=2, + ) collection = selector.generate_overhangs_set() assert len(collection) == 2 + def test_cut_sequence_into_similar_lengths(data): def invalid_overhang(overhang): gc = gc_content(overhang) - three_gc = max([gc_content(overhang[:-1]), - gc_content(overhang[1:])]) == 1 + three_gc = max([gc_content(overhang[:-1]), gc_content(overhang[1:])]) == 1 return (gc != 0.5) and (three_gc or (gc != 0.75)) forbidden_overhangs = list_overhangs(filters=[invalid_overhang]) - selector = OverhangsSelector(forbidden_overhangs=forbidden_overhangs, - differences=1, time_limit=2) + selector = OverhangsSelector( + forbidden_overhangs=forbidden_overhangs, differences=1, time_limit=2 + ) sequence = data["phage_sequence"] solution = selector.cut_sequence( - sequence, equal_segments=50, max_radius=20, include_extremities=False) - indices = [o['location'] for o in solution] + sequence, equal_segments=50, max_radius=20, include_extremities=False + ) + indices = [o["location"] for o in solution] diffs = np.diff([0] + indices + [len(sequence)]) assert len(diffs) == 50 assert int(diffs.mean()) == 970 + def test_from_record(): seq = random_dna_sequence(7202, seed=123) record = sequence_to_biopython_record(seq) zone = (1900, len(seq) - 1900) - annotate_record(record, location=zone, - label="Gene: acs", color='#8edfff') - annotate_record(record, location=zone, - label="@EnforceTranslation") - annotate_record(record, location=(zone[0]-1800, zone[0], 0), - label="@AvoidChanges") - annotate_record(record, location=(zone[1], 1800 + zone[1], 0), - label="@AvoidChanges") + annotate_record(record, location=zone, label="Gene: acs", color="#8edfff") + annotate_record(record, location=zone, label="@EnforceTranslation") + annotate_record( + record, location=(zone[0] - 1800, zone[0], 0), label="@AvoidChanges" + ) + annotate_record( + record, location=(zone[1], 1800 + zone[1], 0), label="@AvoidChanges" + ) # ADD SEMI-RANDOM CUTTING ZONES cut_region_size = 70 zones = [ - (x + int(200*np.sin(x)), - x + cut_region_size + int(200*np.sin(x) - 50*np.cos(x)), - 0) + ( + x + int(200 * np.sin(x)), + x + cut_region_size + int(200 * np.sin(x) - 50 * np.cos(x)), + 0, + ) for x in range(50, len(seq), 1030)[1:] ] for zone in zones: @@ -96,6 +104,5 @@ def test_from_record(): # SOLVE PROBLEM selector = OverhangsSelector(gc_min=0.25, gc_max=0.75, differences=2) - solution = selector.cut_sequence(record, allow_edits=True, - include_extremities=True) - assert (solution is not None) + solution = selector.cut_sequence(record, allow_edits=True, include_extremities=True) + assert solution is not None From d0acbe3700ff2b34cf9091062c980e1964a7ddc7 Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Thu, 10 Sep 2020 15:24:28 +0100 Subject: [PATCH 07/15] Tests for OverhangSetOptimizer --- goldenhinges/OverhangSetOptimizer.py | 28 +++++++++++++++++++++++++--- tests/test_basics.py | 24 ++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/goldenhinges/OverhangSetOptimizer.py b/goldenhinges/OverhangSetOptimizer.py index 234e3a7..a36d838 100644 --- a/goldenhinges/OverhangSetOptimizer.py +++ b/goldenhinges/OverhangSetOptimizer.py @@ -3,6 +3,30 @@ class OverhangSetOptimizer: + """A class for choosing a set of overhangs for Golden-Gate assembly and others. + + The selected overhangs are in the `selected_overhangs` attribute. + + + Parameters + ---------- + + set_size + Number of overhangs to choose. + + possible_overhangs + List of a few overhangs the collection should be chosen from. + + external_overhangs + List of overhangs that all selected overhangs should be compatible with. + + initial_set + An initial set of overhangs to start the optimization with. + + mutations + Number of overhangs to remove/add at each iteration of optimization. + """ + def __init__( self, set_size, @@ -62,9 +86,7 @@ def optimize(self, iterations=100): added = np.random.choice( list(self.leftover_overhangs), self.mutations, replace=False ) - new_overhangs = self.selected_overhangs.difference(removed).union( - added - ) + new_overhangs = self.selected_overhangs.difference(removed).union(added) new_score = self.score(new_overhangs, self.external_overhangs) if new_score > self.current_score: self.selected_overhangs.update(added) diff --git a/tests/test_basics.py b/tests/test_basics.py index e771261..72eec5c 100755 --- a/tests/test_basics.py +++ b/tests/test_basics.py @@ -10,6 +10,7 @@ gc_content, sequences_differences, reverse_complement, + OverhangSetOptimizer, ) from dnachisel import random_dna_sequence, sequence_to_biopython_record, annotate_record import pytest @@ -106,3 +107,26 @@ def test_from_record(): selector = OverhangsSelector(gc_min=0.25, gc_max=0.75, differences=2) solution = selector.cut_sequence(record, allow_edits=True, include_extremities=True) assert solution is not None + + +def test_overhangsetoptimizer(): + number_of_required_overhangs = 4 + optimizer = OverhangSetOptimizer( + set_size=number_of_required_overhangs, + possible_overhangs=[ + "TAGG", + "ATGG", + "GACT", + "GGAC", + "TCCG", + "CCAG", + "AAAA", + "TTTT", + ], + external_overhangs=["TAGG", "ACTG"], + ) + assert len(optimizer.selected_overhangs) == number_of_required_overhangs + assert ( + len(optimizer.selected_overhangs & set(optimizer.possible_overhangs)) + == number_of_required_overhangs + ) From 6ef419d0dd2f494e065aefbbddf33286e09a2fcb Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Thu, 10 Sep 2020 15:28:42 +0100 Subject: [PATCH 08/15] Fixed test Sometimes solution len >= 24 is not found within 2s; increased time limit and decreased requirement. --- tests/test_basics.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_basics.py b/tests/test_basics.py index 72eec5c..b9cc8c0 100755 --- a/tests/test_basics.py +++ b/tests/test_basics.py @@ -35,9 +35,9 @@ def test_generate_overhangs_collection(): def test_generate_overhangs_collection2(): - selector = OverhangsSelector(gc_min=0.25, gc_max=0.75, differences=2, time_limit=2) + selector = OverhangsSelector(gc_min=0.25, gc_max=0.75, differences=2, time_limit=3) collection = selector.generate_overhangs_set() - assert len(collection) >= 24 + assert len(collection) >= 22 for o1, o2 in itertools.combinations(collection, 2): assert sequences_differences(o1, o2) >= 2 assert sequences_differences(o1, reverse_complement(o2)) >= 2 From b460ded20e5599836043100c402c4da93076f160 Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Thu, 10 Sep 2020 15:32:05 +0100 Subject: [PATCH 09/15] OverhangSetOptimizer.optimize() test --- goldenhinges/OverhangSetOptimizer.py | 5 +++-- tests/test_basics.py | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/goldenhinges/OverhangSetOptimizer.py b/goldenhinges/OverhangSetOptimizer.py index a36d838..1e3be41 100644 --- a/goldenhinges/OverhangSetOptimizer.py +++ b/goldenhinges/OverhangSetOptimizer.py @@ -3,9 +3,10 @@ class OverhangSetOptimizer: - """A class for choosing a set of overhangs for Golden-Gate assembly and others. + """A class for choosing a set of overhangs for Golden-Gate assembly. - The selected overhangs are in the `selected_overhangs` attribute. + Run method `optimize` to optimize the overhang selection. The selected + overhangs are in the `selected_overhangs` attribute. Parameters diff --git a/tests/test_basics.py b/tests/test_basics.py index b9cc8c0..51007f7 100755 --- a/tests/test_basics.py +++ b/tests/test_basics.py @@ -125,6 +125,8 @@ def test_overhangsetoptimizer(): ], external_overhangs=["TAGG", "ACTG"], ) + optimizer.optimize(iterations=100) + assert len(optimizer.selected_overhangs) == number_of_required_overhangs assert ( len(optimizer.selected_overhangs & set(optimizer.possible_overhangs)) From b8d05231f1f08eb4ea0117ac85e2f8c64fce5870 Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Thu, 10 Sep 2020 15:53:23 +0100 Subject: [PATCH 10/15] Test find_compatible_overhangs() --- tests/test_basics.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_basics.py b/tests/test_basics.py index 51007f7..4b0bf51 100755 --- a/tests/test_basics.py +++ b/tests/test_basics.py @@ -12,6 +12,7 @@ reverse_complement, OverhangSetOptimizer, ) +from goldenhinges.clique_methods import find_compatible_overhangs from dnachisel import random_dna_sequence, sequence_to_biopython_record, annotate_record import pytest @@ -132,3 +133,7 @@ def test_overhangsetoptimizer(): len(optimizer.selected_overhangs & set(optimizer.possible_overhangs)) == number_of_required_overhangs ) + + +def test_find_compatible_overhangs(): + assert find_compatible_overhangs(n_solutions_considered=5, randomize=True) From d51ac862d058cb19eb3f93afb47b8ce080e95dec Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Thu, 10 Sep 2020 16:20:08 +0100 Subject: [PATCH 11/15] Test load_record() --- tests/test_basics.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/test_basics.py b/tests/test_basics.py index 4b0bf51..a5c233a 100755 --- a/tests/test_basics.py +++ b/tests/test_basics.py @@ -4,6 +4,7 @@ import os import itertools import numpy as np +import Bio from goldenhinges import ( OverhangsSelector, list_overhangs, @@ -11,7 +12,9 @@ sequences_differences, reverse_complement, OverhangSetOptimizer, + load_record, ) +from goldenhinges.biotools import sequences_differences_array from goldenhinges.clique_methods import find_compatible_overhangs from dnachisel import random_dna_sequence, sequence_to_biopython_record, annotate_record import pytest @@ -137,3 +140,23 @@ def test_overhangsetoptimizer(): def test_find_compatible_overhangs(): assert find_compatible_overhangs(n_solutions_considered=5, randomize=True) + + +def test_sequences_differences_array(): + with pytest.raises(ValueError): + sequences_differences_array("AAA", "AAAT") + # Only use on same-size sequences (3, 4) + + +def test_load_record(): + with pytest.raises(ValueError): + load_record("seq.asd") # wrong extension + + seq_path = os.path.join("tests", "test_data", "sequence.gb") + record_name = "Name longer than 20characters" + record = load_record(filename=seq_path, name=record_name) + assert type(record) == Bio.SeqRecord.SeqRecord + assert record.id == record_name + assert record.name == "Name_longer_than_20c" + + assert type(load_record(filename=seq_path, fmt="gb")) == Bio.SeqRecord.SeqRecord From 7faf19af2c22078e5b14407fa6a26762c75644f0 Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Thu, 10 Sep 2020 16:28:30 +0100 Subject: [PATCH 12/15] Readme update + Coveralls badge --- README.rst | 26 +++++++++++++++----------- pypi-readme.rst | 9 +++++---- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/README.rst b/README.rst index 6c5b5c3..2f5513f 100755 --- a/README.rst +++ b/README.rst @@ -10,6 +10,10 @@ :target: https://travis-ci.org/Edinburgh-Genome-Foundry/GoldenHinges :alt: Travis CI build status +.. image:: https://coveralls.io/repos/github/Edinburgh-Genome-Foundry/GoldenHinges/badge.svg?branch=master + :target: https://coveralls.io/github/Edinburgh-Genome-Foundry/GoldenHinges?branch=master + + Golden Hinges (full documentation `here `_) is a Python library to find sets of overhangs (also called junctions, or protusions) for multipart DNA assembly such as Golden Gate assembly. @@ -24,14 +28,14 @@ mandatory and forbidden overhangs) Golden Hinges enables to find: decomposition, in exterme cases where the original sequence does not allow for such decomposition. -You can see Golden Hinges in action in this -`web demo `_: +You can see Golden Hinges in action in this web demo: +`Design Golden Gate Overhangs `_ Examples of use ----------------- +--------------- -Finding maximal overhangs sets -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Finding maximal overhang sets +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Let us compute a collection of overhangs, as large as possible, where @@ -91,8 +95,8 @@ attempt to find larger sets: Using experimental annealing data from Potapov 2018 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -`This study by Potapov *et al.* `_ -provides insightful data on overhangs annealing, in particular which overhangs +`This study by Potapov et al. `_ +provides insightful data on overhang annealing, in particular which overhangs have weak general annealing power, and which pairs of overhangs have significant "cross-talk". You can use the data in this paper via the Python `tatapov `_ library @@ -183,7 +187,7 @@ Note that solutions involving base changes are penalized and solutions involving the original solution will always be prefered, so no base change will be suggested unless strictly necessary. -If the input record has `DnaChisel `_ +If the input record has `DNA Chisel `_ annotations such as ``@AvoidChanges`` or ``@EnforceTranslation``, these will be enforced to forbid some mutations. @@ -228,11 +232,11 @@ using this command: -Contribute ! --------------- +Contribute! +----------- Golden Hinges is an open-source software originally written at the `Edinburgh Genome Foundry `_ by `Zulko `_ and `released on Github `_ -under the MIT licence. Everyone is welcome to contribute ! +under the MIT licence. Everyone is welcome to contribute! diff --git a/pypi-readme.rst b/pypi-readme.rst index 5541443..e5c8a2a 100644 --- a/pypi-readme.rst +++ b/pypi-readme.rst @@ -1,6 +1,7 @@ Golden Hinges -============== -Golden Hinges (full documentation `here `_) is a Python library to find sets of overhangs (also called junctions, or protusions) for multipart DNA assembly such as Golden Gate assembly. +============= + +Golden Hinges (full documentation `here `_) is a Python library to find sets of overhangs (also called junctions, or protrusions) for multipart DNA assembly such as Golden Gate assembly. Given a set of constraints (GC content bounds, differences between overhangs, mandatory and forbidden overhangs) Golden Hinges enables to find: @@ -29,14 +30,14 @@ Infos ``_ -**Live demo** +**Live demo:** ``_ **License:** MIT, Copyright Edinburgh Genome Foundry More biology software ------------------------ +--------------------- .. image:: https://raw.githubusercontent.com/Edinburgh-Genome-Foundry/Edinburgh-Genome-Foundry.github.io/master/static/imgs/logos/egf-codon-horizontal.png :target: https://edinburgh-genome-foundry.github.io/ From 84636d239353f9dc3d7a40603dcb41a7fd321e55 Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Thu, 10 Sep 2020 16:31:14 +0100 Subject: [PATCH 13/15] v0.2.2 --- goldenhinges/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/goldenhinges/version.py b/goldenhinges/version.py index 3ced358..b5fdc75 100755 --- a/goldenhinges/version.py +++ b/goldenhinges/version.py @@ -1 +1 @@ -__version__ = "0.2.1" +__version__ = "0.2.2" From bacd2f14fd20a1c25bac19d51b0353022fa10b2c Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Thu, 10 Sep 2020 16:37:54 +0100 Subject: [PATCH 14/15] Doc html version string fix --- docs/conf.py | 202 +++++++++++++++++++++++++++------------------------ 1 file changed, 106 insertions(+), 96 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 795a85d..e0dbd87 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -16,75 +16,80 @@ # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) +# sys.path.insert(0, os.path.abspath('.')) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.viewcode', 'numpydoc'] +extensions = [ + "sphinx.ext.autodoc", + "sphinx.ext.todo", + "sphinx.ext.viewcode", + "numpydoc", +] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix of source filenames. -source_suffix = ['.rst'] +source_suffix = [".rst"] # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = u'GoldenHinges' -copyright = u'2017, Edinburgh Genome Foundry' +project = u"GoldenHinges" +copyright = u"2017, Edinburgh Genome Foundry" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. -version = '0.1.0' +version = "0.2.2" # The full version, including alpha/beta/rc tags. -release = '0.1.0' +release = "0.2.2" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. -#language = None +# language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False +# keep_warnings = False # -- Options for HTML output --------------------------------------------------- @@ -92,37 +97,41 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -on_rtd = os.environ.get('READTHEDOCS', None) == 'True' +on_rtd = os.environ.get("READTHEDOCS", None) == "True" if not on_rtd: # only import and set the theme if we're building docs locally import sphinx_rtd_theme - html_theme = 'sphinx_rtd_theme' + + html_theme = "sphinx_rtd_theme" html_theme_path = sphinx_rtd_theme.get_html_theme_path() + def setup(app): - app.add_stylesheet('css/main.css') + app.add_stylesheet("css/main.css") + + else: html_context = { - 'css_files': [ - 'https://media.readthedocs.org/css/sphinx_rtd_theme.css', - 'https://media.readthedocs.org/css/readthedocs-doc-embed.css', - '_static/css/main.css', - ], + "css_files": [ + "https://media.readthedocs.org/css/sphinx_rtd_theme.css", + "https://media.readthedocs.org/css/readthedocs-doc-embed.css", + "_static/css/main.css", + ], } -#sys.path.append(os.path.abspath('_themes')) +# sys.path.append(os.path.abspath('_themes')) # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. @@ -136,105 +145,99 @@ def setup(app): # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Output file base name for HTML help builder. -htmlhelp_basename = 'GoldenHingesdoc' +htmlhelp_basename = "GoldenHingesdoc" # -- Options for LaTeX output -------------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ - ('index', 'GoldenHinges.tex', u'GoldenHinges Documentation', - u'Zulko', 'manual'), + ("index", "GoldenHinges.tex", u"GoldenHinges Documentation", u"Zulko", "manual"), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'GoldenHinges', u'PACKAGE_NAME Documentation', - [u'Zulko'], 1) -] +man_pages = [("index", "GoldenHinges", u"PACKAGE_NAME Documentation", [u"Zulko"], 1)] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ @@ -243,79 +246,86 @@ def setup(app): # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - ('index', 'GoldenHinges', u'GoldenHinges Documentation', - u'Zulko', 'GoldenHinges', 'One line description of project.', - 'Miscellaneous'), + ( + "index", + "GoldenHinges", + u"GoldenHinges Documentation", + u"Zulko", + "GoldenHinges", + "One line description of project.", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False +# texinfo_no_detailmenu = False # -- Options for Epub output --------------------------------------------------- # Bibliographic Dublin Core info. -epub_title = u'GoldenHinges' -epub_author = u'Zulko' -epub_publisher = u'Zulko' -epub_copyright = u'2017, Edinburgh Genome Foundry' +epub_title = u"GoldenHinges" +epub_author = u"Zulko" +epub_publisher = u"Zulko" +epub_copyright = u"2017, Edinburgh Genome Foundry" # The language of the text. It defaults to the language option # or en if the language is not set. -#epub_language = '' +# epub_language = '' # The scheme of the identifier. Typical schemes are ISBN or URL. -#epub_scheme = '' +# epub_scheme = '' # The unique identifier of the text. This can be a ISBN number # or the project homepage. -#epub_identifier = '' +# epub_identifier = '' # A unique identification for the text. -#epub_uid = '' +# epub_uid = '' # A tuple containing the cover image and cover page html template filenames. -#epub_cover = () +# epub_cover = () # A sequence of (type, uri, title) tuples for the guide element of content.opf. -#epub_guide = () +# epub_guide = () # HTML files that should be inserted before the pages created by sphinx. # The format is a list of tuples containing the path and title. -#epub_pre_files = [] +# epub_pre_files = [] # HTML files shat should be inserted after the pages created by sphinx. # The format is a list of tuples containing the path and title. -#epub_post_files = [] +# epub_post_files = [] # A list of files that should not be packed into the epub file. -#epub_exclude_files = [] +# epub_exclude_files = [] # The depth of the table of contents in toc.ncx. -#epub_tocdepth = 3 +# epub_tocdepth = 3 # Allow duplicate toc entries. -#epub_tocdup = True +# epub_tocdup = True # Fix unsupported image types using the PIL. -#epub_fix_images = False +# epub_fix_images = False # Scale large images. -#epub_max_image_width = 0 +# epub_max_image_width = 0 # If 'no', URL addresses will not be shown. -#epub_show_urls = 'inline' +# epub_show_urls = 'inline' # If false, no index is generated. -#epub_use_index = True +# epub_use_index = True + +# autodoc_member_order = 'bysource' -#autodoc_member_order = 'bysource' From 49a615e6f4592a73ddc616eb8c54d2924f4a9b00 Mon Sep 17 00:00:00 2001 From: Peter Vegh Date: Thu, 10 Sep 2020 16:47:04 +0100 Subject: [PATCH 15/15] Documentation typo fixes --- docs/index.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 84f87bf..d7c1e3a 100755 --- a/docs/index.rst +++ b/docs/index.rst @@ -83,7 +83,7 @@ attempt to find larger sets: Finding a sequence decomposition -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In this example, we find where to cut a 50-kilobasepair sequence to create @@ -91,8 +91,8 @@ assemblable fragments with 4-basepair overhangs. We indicate that: - There should be 50 fragments, with a minimum of variance in their sizes. - The fragments overhangs should have 25-75 GC% with a 1-basepair difference - between any two overhangs (and their reverse-complement). They should also be - compatible with the 4-basepair extremities of the sequence. + between any two overhangs (and their reverse-complement). They should also be + compatible with the 4-basepair extremities of the sequence. .. code:: python @@ -104,7 +104,7 @@ assemblable fragments with 4-basepair overhangs. We indicate that: solution = selector.cut_sequence(sequence, equal_segments=50, max_radius=20, include_extremities=True) -This returns a list of dictionnaries, each representing one overhang with +This returns a list of dictionaries, each representing one overhang with properties ``o['location']`` (coordinate of the overhang in the sequence) and ``o['sequence']`` (sequence of the overhang). @@ -123,7 +123,7 @@ the overhang's positions, using the following function: display_positions=False) Sequence mutation and decomposition from a Genbank file -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the input sequence is a Genbank record (or a Biopython record) has locations annotated vy features feature labeled ``!cut``, GoldenHinges will attempt to @@ -135,7 +135,7 @@ Note that solutions involving base changes are penalized and solutions involving the original solution will always be prefered, so no base change will be suggested unless strictly necessary. -If the input record has `DnaChisel `_ +If the input record has `DNA Chisel `_ annotations such as ``@AvoidChanges`` or ``@EnforceTranslation``, these will be enforced to forbid some mutations. @@ -180,13 +180,13 @@ using this command: Contribute ! --------------- +------------ Golden Hinges is an open-source software originally written at the `Edinburgh Genome Foundry `_ by `Zulko `_ and `released on Github `_ -under the MIT licence. Everyone is welcome to contribute ! +under the MIT licence. Everyone is welcome to contribute! .. raw:: html