Skip to content

Commit

Permalink
dual barcoding improvements (#67)
Browse files Browse the repository at this point in the history
This adds "name" field to dual barcodes, as well as a way to create dual barcode primer sets from a list of dual barcodes, rather than just from a csv file.
  • Loading branch information
Koeng101 authored Feb 28, 2024
1 parent 025f71d commit d0607bb
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 5 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [Unreleased]
- Updated dual barcodes primer sets to be created without csv files [#67](https://github.com/Koeng101/dnadesign/pull/67)
- Added workers to bio as a way to process data [#62](https://github.com/Koeng101/dnadesign/pull/62)
- Improved megamash efficiency and added []Match JSON conversion [#61](https://github.com/Koeng101/dnadesign/pull/61)
- Added barcoding functionality for sequencing reads [#59](https://github.com/Koeng101/dnadesign/pull/59)
Expand Down
38 changes: 38 additions & 0 deletions lib/sequencing/barcoding/barcoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,10 +173,48 @@ type DualBarcodePrimerSet struct {

// DualBarcode contains a forward and reverse barcode.
type DualBarcode struct {
Name string
Forward string
Reverse string
}

// DualBarcodesToPrimerSet parsers a list of dual barcodes into a dual barcode
// primer set.
func DualBarcodesToPrimerSet(dualBarcodes []DualBarcode) DualBarcodePrimerSet {
var result DualBarcodePrimerSet
result.BarcodeMap = make(map[string]DualBarcode)
result.ReverseBarcodeMap = make(map[DualBarcode]string)
forwardBarcodesMap := make(map[string]bool)
reverseBarcodesMap := make(map[string]bool)

for _, barcode := range dualBarcodes {
forwardBarcodesMap[barcode.Forward] = true
reverseBarcodesMap[barcode.Reverse] = true
newDualBarcode := DualBarcode{Forward: barcode.Forward, Reverse: barcode.Reverse}
result.BarcodeMap[barcode.Name] = newDualBarcode
result.ReverseBarcodeMap[newDualBarcode] = barcode.Name
}
// Convert maps to slices
forwardBarcodes := make([]string, 0, len(forwardBarcodesMap))
for barcode := range forwardBarcodesMap {
forwardBarcodes = append(forwardBarcodes, barcode)
}
reverseBarcodes := make([]string, 0, len(reverseBarcodesMap))
for barcode := range reverseBarcodesMap {
reverseBarcodes = append(reverseBarcodes, barcode)
}

// Sort the slices
sort.Strings(forwardBarcodes)
sort.Strings(reverseBarcodes)

// Append sorted barcodes to result
result.ForwardBarcodes = forwardBarcodes
result.ReverseBarcodes = reverseBarcodes

return result
}

// ParseDualPrimerSet parses a csv file into a DualBarcodePrimerSet.
func ParseDualPrimerSet(csvFile io.Reader) (DualBarcodePrimerSet, error) {
var result DualBarcodePrimerSet
Expand Down
20 changes: 15 additions & 5 deletions lib/sequencing/barcoding/barcoding_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
package barcoding
package barcoding_test

import (
_ "embed"
"fmt"
"strings"
"testing"

"github.com/koeng101/dnadesign/lib/bio"
"github.com/koeng101/dnadesign/lib/sequencing/barcoding"
"github.com/koeng101/dnadesign/lib/sequencing/nanopore"
)

Expand All @@ -32,15 +34,15 @@ ATGTTGCCTACCTACTTGGTTCAGTTACGTATTGCTAAGGTTAACACAAAGACACCCGACAACTTTCTTCAGCACCTGCC
+
%%%%%%####$%(*+,)&''));::80-''')0000BCFJOSNLQJKJKRSMSFKLSDHIMGJEHDABFCCHIJKSSISHGJMSKSSKSOLSJSJIOJMSMNRKJKSJQLISGHGIHSJHSLJKSMISJEEBBCBH>=:/---849000>@GEEEJOKJIJHGSISIOMHKKGFGFJMGHHROIPSIJSSJKSLSMMSKJSJIIIKNMMEISKMLSKSHJPHLMJNQLHGRJKKKSHKKOMISOSGFJIOSSLSOSSSSNMSIFH876'&'(....+++(%`

primerSet, _ := ParseSinglePrimerSet(strings.NewReader(primerSetCsv))
primerSet, _ := barcoding.ParseSinglePrimerSet(strings.NewReader(primerSetCsv))
parser := bio.NewFastqParser(strings.NewReader(reads))
records, _ := parser.Parse()

var barcodes []string
for _, record := range records {
// Note: Nanopore has a score that requires a lower match (~16) than the
// default ScoreMagicNumber (18).
barcode, _ := SingleBarcodeSequence(record.Sequence, primerSet)
barcode, _ := barcoding.SingleBarcodeSequence(record.Sequence, primerSet)
if barcode != "" {
barcodes = append(barcodes, barcode)
}
Expand Down Expand Up @@ -102,13 +104,13 @@ GTGTGTACTTCGTTCAGTTACGTATGCTTAGTTGTGAACACAAAGACACCGACAACTTCTTCAGCACCTTCTTGATCTTC
+
$$$%%%%&',*+,//;=:9771111,-+*&&%%%%%'.1389:987777844443553((50+,,88:;2///;6555597677764334578879999:977778888888;=;99888766534424551666688767644445776777:<;:998986652222378*9:::<;:99876322211//..,,,,-0667873333488('''122../
`
primerSet, _ := ParseDualPrimerSet(strings.NewReader(primerSetCsv))
primerSet, _ := barcoding.ParseDualPrimerSet(strings.NewReader(primerSetCsv))
parser := bio.NewFastqParser(strings.NewReader(reads))
records, _ := parser.Parse()

var wells []string
for _, record := range records[0:10] {
well, _ := DualBarcodeSequence(record.Sequence, primerSet)
well, _ := barcoding.DualBarcodeSequence(record.Sequence, primerSet)
if well != "" {
wells = append(wells, well)
}
Expand All @@ -117,3 +119,11 @@ $$$%%%%&',*+,//;=:9771111,-+*&&%%%%%'.1389:987777844443553((50+,,88:;2///;655559
fmt.Println(wells)
// Output: [B15 O1 O1 J22 C22 E20 A15]
}

func TestDualBarcodesToPrimerSet(t *testing.T) {
dualBarcodes := []barcoding.DualBarcode{{Name: "test", Forward: "ATG", Reverse: "TAA"}}
primerSet := barcoding.DualBarcodesToPrimerSet(dualBarcodes)
if len(primerSet.ForwardBarcodes) != 1 || len(primerSet.ReverseBarcodes) != 1 {
t.Errorf("Should have gotten barcode length of 1.")
}
}

0 comments on commit d0607bb

Please sign in to comment.