Skip to content

Commit

Permalink
#30 Add unit tests for BioCollections functions
Browse files Browse the repository at this point in the history
  • Loading branch information
kMutagene committed Jun 22, 2020
1 parent 4027420 commit 9ae84cc
Show file tree
Hide file tree
Showing 9 changed files with 377 additions and 84 deletions.
33 changes: 6 additions & 27 deletions src/BioFSharp/BioArray.fs
Original file line number Diff line number Diff line change
Expand Up @@ -40,42 +40,37 @@ module BioArray =
|> Seq.choose OptionConverter.charToOptionNucleotid
|> Seq.toArray


/// Create the reverse DNA or RNA strand. For example, the sequence "ATGC" is converted to "CGTA"
let reverse (nucs:BioArray<Nucleotides.Nucleotide>) =
let reverse (nucs:BioArray<Nucleotides.Nucleotide>) : BioArray<_> =
nucs |> Array.rev

/// Create the complement DNA or cDNA (from RNA) strand. For example, the sequence "ATGC" is converted to "TACG"
let complement (nucs:BioArray<Nucleotides.Nucleotide>) =
let complement (nucs:BioArray<Nucleotides.Nucleotide>) : BioArray<_>=
nucs |> Array.map Nucleotides.complement

/// Create the reverse complement strand meaning antiparallel DNA strand or the cDNA (from RNA) respectivly. For example, the sequence "ATGC" is converted to "GCAT". "Antiparallel" combines the two functions "Complement" and "Inverse".
let reverseComplement (nucs:BioArray<Nucleotides.Nucleotide>) =
let reverseComplement (nucs:BioArray<Nucleotides.Nucleotide>) : BioArray<_>=
nucs |> Array.map Nucleotides.complement |> Array.rev


/// Builts a new collection whose elements are the result of applying
/// the given function to each triplet of the collection.
let mapInTriplets f (input:BioArray<'a>) =
Array.init (input.Length / 3) (fun i -> f (input.[i],input.[i+1],input.[i+2]) )

let mapInTriplets mapping (input:BioArray<'a>) =
Array.init (input.Length / 3) (fun i -> mapping (input.[i * 3],input.[(i*3)+1],input.[(i*3)+2]) )

// Replace T by U
/// Transcribe a given DNA coding strand (5'-----3')
[<Obsolete("This function name contained a typo and will be removed in the next major release. Use transcribeCodingStrand instead.")>]
let transcribeCodeingStrand (nucs:BioArray<Nucleotides.Nucleotide>) : BioArray<_> =
nucs |> Array.map (fun nuc -> Nucleotides.replaceTbyU nuc)

/// Transcribe a given DNA coding strand (5'-----3')
/// Transcribe a given DNA coding strand (5'-----3')
let transcribeCodingStrand (nucs:BioArray<Nucleotides.Nucleotide>) : BioArray<_> =
nucs |> Array.map (fun nuc -> Nucleotides.replaceTbyU nuc)

//
/// Transcribe a given DNA template strand (3'-----5')
let transcribeTemplateStrand (nucs:BioArray<Nucleotides.Nucleotide>) : BioArray<_> =
nucs |> Array.map (fun nuc -> Nucleotides.replaceTbyU (Nucleotides.complement nuc))


/// translates nucleotide sequence to aminoacid sequence
let translate (nucleotideOffset:int) (rnaSeq:BioArray<Nucleotides.Nucleotide>) : BioArray<_> =
if (nucleotideOffset < 0) then
Expand All @@ -84,43 +79,33 @@ module BioArray =
|> Array.skip nucleotideOffset
|> mapInTriplets Nucleotides.lookupBytes


/// Compares the elemens of two biosequence
let isEqual a b =
Array.compareWith
(fun elem1 elem2 ->
if elem1 = elem2 then 0
else 1) a b




/// Returns string of one-letter-code
let toString (bs:BioArray<#IBioItem>) =
new string (bs |> Array.map BioItem.symbol)



/// Returns monoisotopic mass of the given sequence
let toMonoisotopicMass (bs:BioArray<#IBioItem>) =
bs |> Array.sumBy BioItem.monoisoMass


/// Returns average mass of the given sequence
let toAverageMass (bs:BioArray<#IBioItem>) =
bs |> Array.sumBy BioItem.averageMass


/// Returns monoisotopic mass of the given sequence and initial value (e.g. H2O)
let toMonoisotopicMassWith (state) (bs:BioArray<#IBioItem>) =
bs |> Array.fold (fun massAcc item -> massAcc + BioItem.monoisoMass item) state


/// Returns average mass of the given sequence and initial value (e.g. H2O)
let toAverageMassWith (state) (bs:BioArray<#IBioItem>) =
bs |> Array.fold (fun massAcc item -> massAcc + BioItem.averageMass item) state


/// Returns a function to calculate the monoisotopic mass of the given sequence !memoization
let initMonoisoMass<'a when 'a :> IBioItem> : (BioArray<_> -> float) =
let memMonoisoMass =
Expand All @@ -129,7 +114,6 @@ module BioArray =
bs
|> Array.sumBy memMonoisoMass)


/// Returns a function to calculate the average mass of the given sequence !memoization
let initAverageMass<'a when 'a :> IBioItem> : (BioArray<_> -> float) =
let memAverageMass =
Expand All @@ -138,23 +122,20 @@ module BioArray =
bs
|> Array.sumBy memAverageMass)


/// Returns a function to calculate the monoisotopic mass of the given sequence and initial value (e.g. H2O) !memoization
let initMonoisoMassWith<'a when 'a :> IBioItem> (state:float) : (BioArray<_> -> float) =
let memMonoisoMass =
Memoization.memoizeP (BioItem.formula >> Formula.monoisoMass)
(fun bs ->
bs |> Array.fold (fun massAcc item -> massAcc + memMonoisoMass item) state)


/// Returns a function to calculate the average mass of the given sequence and initial value (e.g. H2O) !memoization
let initAverageMassWith<'a when 'a :> IBioItem> (state:float) : (BioArray<_> -> float) =
let memAverageMass =
Memoization.memoizeP (BioItem.formula >> Formula.averageMass)
(fun bs ->
bs |> Array.fold (fun massAcc item -> massAcc + memAverageMass item) state)


///Creates an array with information about the abundacies of the distinct BioItems by converting the symbol of the BioItem to an integer and incrementing the given integer. To decrease the size of the resulting array by still having a fast performance, all indices are shifted by 65. Therefore to call the abundancy of a given BioItem, use "Resultcompositionvector.[(BioItem.symbol bioitem) - 65]"
let toCompositionVector (input:BioArray<_>) =
let compVec = Array.zeroCreate 26
Expand All @@ -164,14 +145,12 @@ module BioArray =
if index >= 0 then compVec.[index] <- compVec.[index] + 1)
compVec


///Creates an array with information about the abundacies of the distinct BioItems by converting the symbol of the BioItem to an integer and incrementing the given integer. To decrease the size of the resulting array by still having a fast performance, all indices are shifted by 65. Therefore to call the abundancy of a given BioItem, use "Resultcompositionvector.[(BioItem.symbol bioitem) - 65]"
let toRelCompositionVector (input:BioArray<_>) =
let cvec = toCompositionVector input
let sum = cvec |> Array.sum |> float
cvec |> Array.map (fun i -> float i / sum)


let initSampleBy (rnd:System.Random) (compositionVector:int[]) =
if compositionVector.Length < 26 then failwith "Amino acid composition vector must have length 26 "
let normalize (arr:int[]) =
Expand Down
49 changes: 13 additions & 36 deletions src/BioFSharp/BioList.fs
Original file line number Diff line number Diff line change
Expand Up @@ -43,58 +43,45 @@ module BioList =
|> Seq.choose OptionConverter.charToOptionNucleotid
|> Seq.toList


/// Create the reverse DNA or RNA strand. For example, the sequence "ATGC" is converted to "CGTA"
let reverse (nucs:BioList<Nucleotides.Nucleotide>) =
let reverse (nucs:BioList<Nucleotides.Nucleotide>) : BioList<_> =
nucs |> List.rev

/// Create the complement DNA or cDNA (from RNA) strand. For example, the sequence "ATGC" is converted to "TACG"
let complement (nucs:BioList<Nucleotides.Nucleotide>) =
let complement (nucs:BioList<Nucleotides.Nucleotide>) : BioList<_> =
nucs |> List.map Nucleotides.complement

/// Create the reverse complement strand meaning antiparallel DNA strand or the cDNA (from RNA) respectivly. For example, the sequence "ATGC" is converted to "GCAT". "Antiparallel" combines the two functions "Complement" and "Inverse".
let reverseComplement (nucs:BioList<Nucleotides.Nucleotide>) =
let reverseComplement (nucs:BioList<Nucleotides.Nucleotide>) : BioList<_> =
nucs |> List.map Nucleotides.complement |> List.rev

// /// Builts a new collection whose elements are the result of applying
// /// the given function to each triplet of the collection.
// let mapInTriplets f (input:seq<'a>) =
// let sourceIsEmpty = ref false
// seq { use en = input.GetEnumerator()
// while not(!sourceIsEmpty) do
// match en with
// | Triplet t -> yield (f t)
// | _ -> sourceIsEmpty := true
// }

/// Builts a new collection whose elements are the result of applying
/// the given function to each triplet of the collection.
let mapInTriplets mapping (input:BioList<'a>) =
List.init (input.Length / 3) (fun i -> mapping (input.[i * 3],input.[(i*3)+1],input.[(i*3)+2]) )

// Replace T by U
/// Transcribe a given DNA coding strand (5'-----3')
[<Obsolete("This function name contained a typo and will be removed in the next major release. Use transcribeCodingStrand instead.")>]
let transcribeCodeingStrand (nucs:BioList<Nucleotides.Nucleotide>) =
let transcribeCodeingStrand (nucs:BioList<Nucleotides.Nucleotide>) : BioList<_> =
nucs |> List.map (fun nuc -> Nucleotides.replaceTbyU nuc)

/// Transcribe a given DNA coding strand (5'-----3')
let transcribeCodingStrand (nucs:BioList<Nucleotides.Nucleotide>) =
let transcribeCodingStrand (nucs:BioList<Nucleotides.Nucleotide>) : BioList<_> =
nucs |> List.map (fun nuc -> Nucleotides.replaceTbyU nuc)



//
/// Transcribe a given DNA template strand (3'-----5')
let transcribeTemplateStrand (nucs:BioList<Nucleotides.Nucleotide>) =
let transcribeTemplateStrand (nucs:BioList<Nucleotides.Nucleotide>) : BioList<_> =
nucs |> List.map (fun nuc -> Nucleotides.replaceTbyU (Nucleotides.complement nuc))


/// translates nucleotide sequence to aminoacid sequence
let translate (nucleotideOffset:int) (rnaSeq:BioList<Nucleotides.Nucleotide>) =
let translate (nucleotideOffset:int) (rnaSeq:BioList<Nucleotides.Nucleotide>) : BioList<_> =
if (nucleotideOffset < 0) then
raise (System.ArgumentException(sprintf "Input error: nucleotide offset of %i is invalid" nucleotideOffset))
rnaSeq
|> List.skip nucleotideOffset
// TODO:
//|> mapInTriplets Nucleotides.lookupBytes
|> mapInTriplets Nucleotides.lookupBytes


/// Compares the elemens of two sequence
let isEqual a b =
List.compareWith
Expand All @@ -106,33 +93,26 @@ module BioList =
let toString (bs:BioList<_>) =
new string (bs |> List.map BioItem.symbol |> List.toArray)



/// Returns formula
let toFormula (bs:BioList<#IBioItem>) =
bs |> List.fold (fun acc item -> Formula.add acc (BioItem.formula item)) Formula.emptyFormula


/// Returns monoisotopic mass of the given sequence
let toMonoisotopicMass (bs:BioList<#IBioItem>) =
bs |> List.sumBy BioItem.monoisoMass


/// Returns average mass of the given sequence
let toAverageMass (bs:BioList<#IBioItem>) =
bs |> List.sumBy BioItem.averageMass


/// Returns monoisotopic mass of the given sequence and initial value (e.g. H2O)
let toMonoisotopicMassWith (state) (bs:BioList<#IBioItem>) =
bs |> List.fold (fun massAcc item -> massAcc + BioItem.monoisoMass item) state


/// Returns average mass of the given sequence and initial value (e.g. H2O)
let toAverageMassWith (state) (bs:BioList<#IBioItem>) =
bs |> List.fold (fun massAcc item -> massAcc + BioItem.averageMass item) state


/// Returns a function to calculate the monoisotopic mass of the given sequence !memoization
let initMonoisoMass<'a when 'a :> IBioItem> : (BioList<'a> -> float) =
let memMonoisoMass =
Expand All @@ -141,7 +121,6 @@ module BioList =
bs
|> List.sumBy memMonoisoMass)


/// Returns a function to calculate the average mass of the given sequence !memoization
let initAverageMass<'a when 'a :> IBioItem> : (BioList<'a> -> float) =
let memAverageMass =
Expand All @@ -150,15 +129,13 @@ module BioList =
bs
|> List.sumBy memAverageMass)


/// Returns a function to calculate the monoisotopic mass of the given sequence and initial value (e.g. H2O) !memoization
let initMonoisoMassWith<'a when 'a :> IBioItem> (state:float) : (BioList<'a> -> float) =
let memMonoisoMass =
Memoization.memoizeP (BioItem.formula >> Formula.monoisoMass)
(fun bs ->
bs |> List.fold (fun massAcc item -> massAcc + memMonoisoMass item) state)


/// Returns a function to calculate the average mass of the given sequence and initial value (e.g. H2O) !memoization
let initAverageMassWith<'a when 'a :> IBioItem> (state:float) : (BioList<'a> -> float) =
let memAverageMass =
Expand Down
16 changes: 0 additions & 16 deletions src/BioFSharp/BioSeq.fs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ module BioSeq =
s
|> Seq.choose converter


/// Generates AminoAcid sequence of one-letter-code raw string
let ofAminoAcidString (s:#seq<char>) : BioSeq<_> =
s
Expand Down Expand Up @@ -47,7 +46,6 @@ module BioSeq =
None
else
None

else
None

Expand All @@ -62,7 +60,6 @@ module BioSeq =
| _ -> sourceIsEmpty := true
}


/// Create the reverse DNA or RNA strand. For example, the sequence "ATGC" is converted to "CGTA"
let reverse (nucs:seq<Nucleotides.Nucleotide>) : BioSeq<_> =
nucs |> Seq.rev
Expand All @@ -80,12 +77,10 @@ module BioSeq =
let transcribeCodingStrand (nucs:seq<Nucleotides.Nucleotide>) : BioSeq<_> =
nucs |> Seq.map (fun nuc -> Nucleotides.replaceTbyU nuc)

//
/// Transcribe a given DNA template strand (3'-----5')
let transcribeTemplateStrand (nucs:seq<Nucleotides.Nucleotide>) : BioSeq<_> =
nucs |> Seq.map (fun nuc -> Nucleotides.replaceTbyU (Nucleotides.complement nuc))


/// translates nucleotide sequence to aminoacid sequence
let translate (nucleotideOffset:int) (rnaSeq:seq<Nucleotides.Nucleotide>) : BioSeq<_> =
if (nucleotideOffset < 0) then
Expand All @@ -94,23 +89,17 @@ module BioSeq =
|> Seq.skip nucleotideOffset
|> mapInTriplets Nucleotides.lookupBytes


/// Compares the elemens of two sequence
let isEqual a b =
Seq.compareWith
(fun elem1 elem2 ->
if elem1 = elem2 then 0
else 1) a b




/// Returns string of one-letter-code
let toString (bs:seq<#IBioItem>) =
new string [|for c in bs -> BioItem.symbol c|]



/// Returns formula
let toFormula (bs:seq<#IBioItem>) =
bs |> Seq.fold (fun acc item -> Formula.add acc (BioItem.formula item)) Formula.emptyFormula
Expand All @@ -125,7 +114,6 @@ module BioSeq =
let toAverageMass (bs:seq<#IBioItem>) =
bs |> Seq.sumBy BioItem.averageMass


/// Returns monoisotopic mass of the given sequence and initial value (e.g. H2O)
let toMonoisotopicMassWith (state) (bs:seq<#IBioItem>) =
bs |> Seq.fold (fun massAcc item -> massAcc + BioItem.monoisoMass item) state
Expand All @@ -135,7 +123,6 @@ module BioSeq =
let toAverageMassWith (state) (bs:seq<#IBioItem>) =
bs |> Seq.fold (fun massAcc item -> massAcc + BioItem.averageMass item) state


/// Returns a function to calculate the monoisotopic mass of the given sequence !memoization
let initMonoisoMass<'a when 'a :> IBioItem> : (seq<'a> -> float) =
let memMonoisoMass =
Expand All @@ -144,7 +131,6 @@ module BioSeq =
bs
|> Seq.sumBy memMonoisoMass)


/// Returns a function to calculate the average mass of the given sequence !memoization
let initAverageMass<'a when 'a :> IBioItem> : (seq<'a> -> float) =
let memAverageMass =
Expand All @@ -153,15 +139,13 @@ module BioSeq =
bs
|> Seq.sumBy memAverageMass)


/// Returns a function to calculate the monoisotopic mass of the given sequence and initial value (e.g. H2O) !memoization
let initMonoisoMassWith<'a when 'a :> IBioItem> (state:float) : (seq<'a> -> float) =
let memMonoisoMass =
Memoization.memoizeP (BioItem.formula >> Formula.monoisoMass)
(fun bs ->
bs |> Seq.fold (fun massAcc item -> massAcc + memMonoisoMass item) state)


/// Returns a function to calculate the average mass of the given sequence and initial value (e.g. H2O) !memoization
let initAverageMassWith<'a when 'a :> IBioItem> (state:float) : (seq<'a> -> float) =
let memAverageMass =
Expand Down
18 changes: 17 additions & 1 deletion src/BioFSharp/Playground/BioArray.fsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,20 @@
#load "../BioSeq.fs"
#load "../BioArray.fs"

open BioFSharp.BioArray
open BioFSharp

"AUGGUACUGACGAUUUAUCCUGACGAACUCTT"
|> BioArray.ofNucleotideString
|> BioArray.mapInTriplets id

let a =
"AUGGUACUGACGAUUUAUCCUGACGAACUC"
|> BioArray.ofNucleotideString
|> BioArray.translate 0

let b =
"AUGGUACUGACGAUUUAUCCUGACGAACUC"
|> BioArray.ofNucleotideString
|> BioArray.translate 0

a = b
Loading

0 comments on commit 9ae84cc

Please sign in to comment.