Skip to content

Commit

Permalink
version 0.1.5.4 Fix Fasta parser with modification (#74)
Browse files Browse the repository at this point in the history
  • Loading branch information
maksbotan authored May 16, 2024
1 parent 6717ad6 commit 6728d39
Show file tree
Hide file tree
Showing 7 changed files with 38 additions and 7 deletions.
3 changes: 3 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## [Unreleased]

## [0.1.5.4] - 2024-05-16
- Fix Fasta parser for unknown modifications on the end of the line.

## [0.1.5.3] - 2023-12-08
- Update tests and dependencies.

Expand Down
2 changes: 1 addition & 1 deletion package.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: cobot-io
version: 0.1.5.3
version: 0.1.5.4
github: "biocad/cobot-io"
license: BSD3
category: Bio
Expand Down
2 changes: 1 addition & 1 deletion src/Bio/FASTA.hs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import Bio.FASTA.Writer (WritableFastaToken (..), fastaToText)

-- | Reads 'FastaSequence' from given file.
--
fromFile :: (MonadFail m, MonadIO m) => FilePath -> m (Fasta Char)
fromFile :: (MonadFail m, MonadIO m, ParsableFastaToken a) => FilePath -> m (Fasta a)
fromFile f = liftIO (readFile f) >>= either (fail . errorBundlePretty) pure . parse fastaP (takeBaseName f)

-- | Writes 'FastaSequence' to file.
Expand Down
8 changes: 7 additions & 1 deletion src/Bio/FASTA/Parser.hs
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,14 @@ type Parser = Parsec Void Text
parseOnly :: Parsec Void Text a -> Text -> Either String a
parseOnly p s = first errorBundlePretty $ parse p "input.fasta" s

-- Using 'hspace1' instead of just 'space1' because our 'fastaLine' parser
-- expects each line to end with line-ending or end of file. But if 'sc' consumes end-of-line,
-- 'lexeme' in 'unknownP' also will and 'fastaLine' will not know that line has ended and will
-- expect more symbols.
--
-- 'hspace1' consumes only "horizontal" space, leaving line-ending for 'fastaLine'.
sc :: Parser ()
sc = L.space space1 empty empty
sc = L.space hspace1 empty empty

lexeme :: Parser a -> Parser a
lexeme = L.lexeme sc
Expand Down
2 changes: 2 additions & 0 deletions test/FASTA/order10.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
>mol1
[FAM]ACGT[UNK][
8 changes: 8 additions & 0 deletions test/FASTA/order9.fasta
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
>mol1
[FAM]ACGT[UNK]

>mol2
[HEX]ACCGT

>mol3
[HEX]ACGTCA[UNK]
20 changes: 16 additions & 4 deletions test/FASTASpec.hs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ import Prelude hiding (readFile, writeFile)
import System.Directory (removeFile)
import Test.Hspec

import Bio.FASTA (fastaP, fromFile, toFile)
import Bio.FASTA (ParsableFastaToken, fastaP, fromFile, toFile)
import Bio.FASTA.Parser (parseOnly)
import Bio.FASTA.Type (Fasta, FastaItem (..))
import Bio.FASTA.Type (Fasta, FastaItem (..), ModItem (..), Modification (..))
import Bio.Sequence (bareSequence)

correctFasta1 :: Fasta Char
Expand Down Expand Up @@ -45,6 +45,16 @@ badFasta7 = Left "input.fasta:2:1:\n |\n2 | 5\8217-CTTCAAGAGAGAGACCTGCGT-3\8217
badFasta8 :: Either String (Fasta Char)
badFasta8 = Left "input.fasta:21:5:\n |\n21 | CMV + enhMCK + prcTnT-2\r\n | ^^\nunexpected \"+ \"\nexpecting end of input, end of line, or letter\n"

correctFasta9 :: Fasta ModItem
correctFasta9 =
[ FastaItem "mol1" $ bareSequence [Mod (Unknown "[FAM]"),Letter 'A',Letter 'C',Letter 'G',Letter 'T',Mod (Unknown "[UNK]")]
, FastaItem "mol2" $ bareSequence [Mod (Unknown "[HEX]"),Letter 'A',Letter 'C',Letter 'C',Letter 'G',Letter 'T']
, FastaItem "mol3" $ bareSequence [Mod (Unknown "[HEX]"),Letter 'A',Letter 'C',Letter 'G',Letter 'T',Letter 'C',Letter 'A',Mod (Unknown "[UNK]")]
]

badFasta10 :: Either String (Fasta ModItem)
badFasta10 = Left "input.fasta:2:16:\n|\n2|[FAM]ACGT[UNK][\n|^\nunexpectednewline\nexpectingmodificationname\n"

fastaSpec :: Spec
fastaSpec = describe "Fasta files parser" $ do
describe "fromFile" $ do
Expand All @@ -56,19 +66,21 @@ fastaSpec = describe "Fasta files parser" $ do
parseBadFile "test/FASTA/order6.fasta" badFasta6
parseBadFile "test/FASTA/order7.fasta" badFasta7
parseBadFile "test/FASTA/order8.fasta" badFasta8
parseFile "test/FASTA/order9.fasta" correctFasta9
parseBadFile "test/FASTA/order10.fasta" badFasta10

describe "toFile" $ do
writeFile "test/FASTA/input.fasta" correctFasta5
writeFile "test/FASTA/input.fasta" correctFasta1
writeFile "test/FASTA/input.fasta" correctFasta3

parseFile :: FilePath -> Fasta Char -> Spec
parseFile :: (Show a, Eq a, ParsableFastaToken a) => FilePath -> Fasta a -> Spec
parseFile path cf =
it ("correctly parses good fasta from file " <> path) $ do
fasta <- fromFile path
fasta `shouldBe` cf

parseBadFile :: FilePath -> Either String (Fasta Char) -> Spec
parseBadFile :: (Show a, Eq a, ParsableFastaToken a) => FilePath -> Either String (Fasta a) -> Spec
parseBadFile path cf =
it ("correctly parses bad fasta from file " <> path) $ do
res <- liftIO (readFile path)
Expand Down

0 comments on commit 6728d39

Please sign in to comment.