Skip to content

Commit

Permalink
Fragment flanks (#100)
Browse files Browse the repository at this point in the history
This updates RecursiveFragment to be able to add overhangs to the end of DNA fragments for recursive assembly. This feature is needed at the oligo-production level because it affects how you design the underlying oligos.
  • Loading branch information
Koeng101 authored Oct 3, 2024
1 parent 0694d1d commit 9f69b0b
Show file tree
Hide file tree
Showing 9 changed files with 35 additions and 17 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,14 +89,15 @@ jobs:
fi
- name: Test wheel in fresh environment
#GitHub Actions runners are typically x86_64, even when you're building for ARM64
if: (runner.os == 'Linux' && matrix.arch == 'amd64') || (runner.os == 'macOS' && matrix.arch == runner.arch)
run: |
python -m venv test_env
source test_env/bin/activate
pip install ./py/dist/*.whl
python -c "from dnadesign import parsers; print('Library loaded successfully')"
pip install pytest
pytest ./py/tests -v --capture=no
continue-on-error: true
- name: Debug segmentation fault (macOS)
if: failure() && runner.os == 'macOS'
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ jobs:
pdoc ./py/dnadesign -o ./docs
- name: Upload artifact
uses: actions/upload-pages-artifact@v4
uses: actions/upload-pages-artifact@v2
with:
path: './docs' # Adjust this to your build output directory

Expand Down
22 changes: 16 additions & 6 deletions lib/synthesis/fragment/fragment.go
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,12 @@ type Assembly struct {
// rough... recommendation. Often times the lowest level of oligo has +1 in
// order to fit the right overhangs in. This doesn't matter that much because
// the limiting factor in assemblies is typically mutation rate at that size.
func RecursiveFragment(sequence string, maxCodingSizeOligo int, assemblyPattern []int, excludeOverhangs []string, includeOverhangs []string) (Assembly, error) {
//
// The forwardFlank and reverseFlank are for preparing the sequences for
// recursive assembly. Generally, this involves appending a certain sequence
// to each oligo, and also to the edges of each subassembly. Do not add these
// to the maxCodingSizeOligo: that is done within the function.
func RecursiveFragment(sequence string, maxCodingSizeOligo int, assemblyPattern []int, excludeOverhangs []string, includeOverhangs []string, forwardFlank string, reverseFlank string) (Assembly, error) {
/*
Ok, so this is a note for you hackers out there: this algorithm can be
greatly improved. The optimal way to do this would be to do a continuous
Expand Down Expand Up @@ -271,8 +276,9 @@ func RecursiveFragment(sequence string, maxCodingSizeOligo int, assemblyPattern
sequenceLen := len(sequence)

// get size pattern. This size pattern maps how we need to fragment the sequences
appendLength := len(forwardFlank) + len(reverseFlank)
sizes := make([]int, len(assemblyPattern))
maxSize := maxCodingSizeOligo * assemblyPattern[0]
maxSize := (maxCodingSizeOligo - appendLength) * assemblyPattern[0]
for i := range assemblyPattern {
if i == 0 {
sizes[i] = maxSize
Expand All @@ -281,23 +287,27 @@ func RecursiveFragment(sequence string, maxCodingSizeOligo int, assemblyPattern
sizes[i] = sizes[i-1]*assemblyPattern[i] - smallestMinFragmentSizeSubtraction // subtract approx 60bp to give room for finding overhangs
}
if sequenceLen <= sizes[0] {
fragments, efficiency, err := FragmentWithOverhangs(sequence, maxCodingSizeOligo-60, maxCodingSizeOligo, excludeOverhangs, includeOverhangs)
fragments, efficiency, err := FragmentWithOverhangs(forwardFlank+sequence+reverseFlank, maxCodingSizeOligo-60, maxCodingSizeOligo, excludeOverhangs, includeOverhangs)
if err != nil {
return assembly, err
}
return Assembly{Sequence: sequence, Fragments: fragments, Efficiency: efficiency}, nil
var fragmentsAppended []string
for _, fragment := range fragments {
fragmentsAppended = append(fragmentsAppended, forwardFlank+fragment+reverseFlank)
}
return Assembly{Sequence: sequence, Fragments: fragmentsAppended, Efficiency: efficiency}, nil
}
// After the smallest possible block, begin iterating for each size.
for i, size := range sizes[1:] {
if sequenceLen <= size {
fragments, efficiency, err := FragmentWithOverhangs(sequence, sizes[i]-minFragmentSizeSubtraction, sizes[i], excludeOverhangs, includeOverhangs)
fragments, efficiency, err := FragmentWithOverhangs(forwardFlank+sequence+reverseFlank, sizes[i]-minFragmentSizeSubtraction, sizes[i], excludeOverhangs, includeOverhangs)
if err != nil {
return assembly, err
}
// Now we need to get the derived fragments from this overall construction
var subAssemblies []Assembly
for _, fragment := range fragments {
subAssembly, err := RecursiveFragment(fragment, maxCodingSizeOligo, assemblyPattern, excludeOverhangs, includeOverhangs)
subAssembly, err := RecursiveFragment(fragment, maxCodingSizeOligo, assemblyPattern, excludeOverhangs, includeOverhangs, forwardFlank, reverseFlank)
if err != nil {
return subAssembly, err
}
Expand Down
2 changes: 1 addition & 1 deletion lib/synthesis/fragment/fragment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ func TestRecursiveFragment(t *testing.T) {
gene := records[0].Sequence
maxOligoLen := 174 // for Agilent oligo pools
assemblyPattern := []int{5, 4, 4, 5} // seems reasonable enough
_, err := fragment.RecursiveFragment(gene, maxOligoLen, assemblyPattern, excludeOverhangs, defaultOverhangs)
_, err := fragment.RecursiveFragment(gene, maxOligoLen, assemblyPattern, excludeOverhangs, defaultOverhangs, "GTCTCT", "CGAG")
if err != nil {
t.Errorf("Failed to RecursiveFragment blue1. Got error: %s", err)
}
Expand Down
3 changes: 2 additions & 1 deletion py/dnadesign/definitions.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,4 +212,5 @@ FragmentSequenceResult FragmentSequenceWithOverhangs(
RecursiveFragmentSequenceResult RecursiveFragmentSequence(
char* sequence, int maxCodingSizeOligo, int* assemblyPattern,
int patternCount, char** excludeOverhangs, int excludeCount,
char** includeOverhangs, int includeCount);
char** includeOverhangs, int includeCount, char* forwardFlank,
char* reverseFlank);
8 changes: 6 additions & 2 deletions py/dnadesign/fragment.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,15 +74,19 @@ def _assembly_from_c(c_assembly) -> Assembly:
return Assembly(sequence, fragments, efficiency, sub_assemblies)

def recursive_fragment(sequence: str, max_coding_size_oligo: int, assembly_pattern: List[int],
exclude_overhangs: List[str], include_overhangs: List[str]) -> Assembly:
exclude_overhangs: List[str], include_overhangs: List[str],
forward_flank: str, reverse_flank: str) -> Assembly:
c_sequence = ffi.new("char[]", sequence.encode('utf-8'))
c_forward_flank = ffi.new("char[]", forward_flank.encode('utf-8'))
c_reverse_flank = ffi.new("char[]", reverse_flank.encode('utf-8'))
c_assembly_pattern = ffi.new("int[]", assembly_pattern)
c_exclude_overhangs, _ = _create_c_string_array(exclude_overhangs)
c_include_overhangs, _ = _create_c_string_array(include_overhangs)

result = lib.RecursiveFragmentSequence(c_sequence, max_coding_size_oligo, c_assembly_pattern, len(assembly_pattern),
c_exclude_overhangs, len(exclude_overhangs),
c_include_overhangs, len(include_overhangs))
c_include_overhangs, len(include_overhangs),
c_forward_flank, c_reverse_flank)

if result.error != ffi.NULL:
raise Exception(ffi.string(result.error).decode('utf-8'))
Expand Down
6 changes: 4 additions & 2 deletions py/lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -665,8 +665,10 @@ func FragmentSequenceWithOverhangs(sequence *C.char, minFragmentSize C.int, maxF
}

//export RecursiveFragmentSequence
func RecursiveFragmentSequence(sequence *C.char, maxCodingSizeOligo C.int, assemblyPattern *C.int, patternCount C.int, excludeOverhangs **C.char, excludeCount C.int, includeOverhangs **C.char, includeCount C.int) (*C.Assembly, *C.char) {
func RecursiveFragmentSequence(sequence *C.char, maxCodingSizeOligo C.int, assemblyPattern *C.int, patternCount C.int, excludeOverhangs **C.char, excludeCount C.int, includeOverhangs **C.char, includeCount C.int, forwardFlank *C.char, reverseFlank *C.char) (*C.Assembly, *C.char) {
goSequence := C.GoString(sequence)
goForwardFlank := C.GoString(forwardFlank)
goReverseFlank := C.GoString(reverseFlank)
goAssemblyPattern := make([]int, patternCount)
goExcludeOverhangs := make([]string, excludeCount)
goIncludeOverhangs := make([]string, includeCount)
Expand All @@ -682,7 +684,7 @@ func RecursiveFragmentSequence(sequence *C.char, maxCodingSizeOligo C.int, assem
for i := 0; i < int(includeCount); i++ {
goIncludeOverhangs[i] = C.GoString(includeSlice[i])
}
assembly, err := fragment.RecursiveFragment(goSequence, int(maxCodingSizeOligo), goAssemblyPattern, goExcludeOverhangs, goIncludeOverhangs)
assembly, err := fragment.RecursiveFragment(goSequence, int(maxCodingSizeOligo), goAssemblyPattern, goExcludeOverhangs, goIncludeOverhangs, goForwardFlank, goReverseFlank)
if err != nil {
return nil, C.CString(err.Error())
}
Expand Down
2 changes: 1 addition & 1 deletion py/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def get_shared_lib_ext():

setup(
name='dnadesign',
version='0.1.6',
version='0.1.7',
packages=find_packages(),
package_data={'dnadesign': ['definitions.h', 'libdnadesign.h', "libdnadesign" + get_shared_lib_ext()]},
install_requires=[
Expand Down
4 changes: 2 additions & 2 deletions py/tests/test_fragment.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def test_recursive_fragment():
max_oligo_len = 174 # for Agilent oligo pools
assembly_pattern = [5, 4, 4, 5] # seems reasonable enough

result = recursive_fragment(gene, max_oligo_len, assembly_pattern, exclude_overhangs, default_overhangs)
result = recursive_fragment(gene, max_oligo_len, assembly_pattern, exclude_overhangs, default_overhangs, "GTCTCT", "CGAG")
assert result is not None, "RecursiveFragment failed"
# Add more specific assertions based on the expected structure of the result
assert result.fragments == ['ATGACCATGATTACGCCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAG', 'CCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAG']
assert result.fragments == ['GTCTCTGTCTCTATGACCATGATTACGCCAAGCTTGCATGCCTGCAGGTCGACTCTAGAGGATCCCCGGGTACCGAGCTCGAATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCTGGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCGAG', 'GTCTCTCCAGCTGGCGTAATAGCGAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGCCTGATGCGGTATTTTCTCCTTACGCATCTGTGCGGTATTTCACACCGCATATGGTGCACTCTCAGTACAATCTGCTCTGATGCCGCATAGCGAGCGAG']

0 comments on commit 9f69b0b

Please sign in to comment.