diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index b3563e20..9f86a59f 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -25,7 +25,7 @@ jobs: with: target: stack:poseidon-tools-tests - name: Upload coverage to codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} # set in organization settings # render and deploy haddock documentation diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e1a5706..5a0566ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,9 +1,19 @@ - V 1.5.9.0: - Added a feature to list bibliography information via `trident list --bibliography`. - Added a new Server API `/bibliography` to serve bibliography information via HTTP. +- V 1.5.7.4: + - Fixed a bug that broke the long-form genotype data input option (with `--genoFile + --snpFile + ...`). +- V 1.5.7.3: + - Allowed `0` in the `Nr_SNPs` .janno column. +- V 1.5.7.2: + - Fixed a bug introduced in Version 1.5.5.0, where input using option "-p" (for example in init) would not behave correctly if input files have multiple file endings, separated by dots. +- V 1.5.7.1: + - Fixed a bug in the .janno reading triggered by trailing `à` characters and caused by premature whitespace trimming. + - Removed the hacky `removeNoBreakSpace` function from the .janno reading pipeline. It is not necessary any more. + - Added a golden test that ensures both changes perform as expected. - V 1.5.7.0: - Added support for VCF files (Variant Call Format) in Janno-packages. - - restructured test package structure, affecting some of the unit- and golden tests. + - Restructured test package structure, affecting some of the unit- and golden tests. - V 1.5.6.0: - Introduced individual `Janno...` types for every .janno column (except Poseidon_ID) in a new module `ColumnTypes`. This was done to improve .janno validation error messages. - Defined a typeclass `Makeable` with a function `make` to write smart constructors for the column types. diff --git a/CHANGELOGRELEASE.md b/CHANGELOGRELEASE.md index daca83e9..23525a2f 100644 --- a/CHANGELOGRELEASE.md +++ b/CHANGELOGRELEASE.md @@ -1,5 +1,18 @@ +### V 1.5.7.3 + +This patch release fixes three minor bugs, some of which were accidentally introduced with the big changes in v1.5.7.0. + +1. Fixed a bug in the .janno reading triggered by trailing `à` characters. +2. Reverted unspecified behaviour: `0` is again allowed in the `Nr_SNPs` .janno column. +3. Fixed a bug introduced in v1.5.5.0, where command line input using the `-p` option would not behave correctly if the input files have multiple file endings, separated by dots. + ### V 1.5.7.0 +> [!WARNING] +> On 2024/11/06 we realized that this release includes a breaking change that is not documented below. +> The command line input interface for unpackaged genotype data was modified from previously `--inFormat EIGENSTRAT|PLINK + --genoFile + --snpFile + --indFile` to now `--genoFile + --snpFile + --indFile` and `--bedFile + --bimFile + --famFile`. So the format selection with the `--inFormat` argument was removed and replaced with separate file selectors for EIGENSTRAT and PLINK data. +> This affects all `trident` subcommands that allow reading of unpackaged genotype data, namely `init`, `forge`, `genoconvert` and `validate`. + This release further improves `.janno` parsing error messages and adds reading support for gzipped PLINK (`.bed` and `.bim`) and EIGENSTRAT (`.geno` and `.snp`) files. We also added (experimental) support for reading VCF files. #### Better .janno error messages diff --git a/src/Poseidon/CLI/OptparseApplicativeParsers.hs b/src/Poseidon/CLI/OptparseApplicativeParsers.hs index 700e8115..f4a1fc58 100644 --- a/src/Poseidon/CLI/OptparseApplicativeParsers.hs +++ b/src/Poseidon/CLI/OptparseApplicativeParsers.hs @@ -37,8 +37,8 @@ import Data.List.Split (splitOn) import Data.Version (Version) import qualified Options.Applicative as OP import SequenceFormats.Plink (PlinkPopNameMode (PlinkPopNameAsBoth, PlinkPopNameAsFamily, PlinkPopNameAsPhenotype)) -import System.FilePath (dropExtensions, takeExtensions, - (<.>)) +import System.FilePath (splitExtension, splitExtensions, + takeExtension, (<.>)) import qualified Text.Parsec as P import Text.Read (readMaybe) @@ -454,7 +454,9 @@ parseInGenoOne = OP.option (OP.eitherReader readGenoInput) ( \For VCF please see option --vcfFile") where readGenoInput :: FilePath -> Either String GenotypeFileSpec - readGenoInput p = makeGenoInput (dropExtensions p) (takeExtensions p) + readGenoInput p = + let (path, extension) = splitExtensionsOptGz p + in makeGenoInput path extension makeGenoInput path ext | ext `elem` [".geno", ".snp", ".ind"] = Right $ GenotypeEigenstrat (path <.> ".geno") Nothing @@ -474,6 +476,25 @@ parseInGenoOne = OP.option (OP.eitherReader readGenoInput) ( (path <.> ".fam") Nothing | otherwise = Left $ "unknown file extension: " ++ ext +-- a "smarter" version of `takeExtensions` and `dropExtensions, which splits a filepath at two extensions +-- if the last one is ".gz" but otherwise splits only one. +-- This is important because users may submit files with multiple dots in their name, in which case takeExtensions would return +-- more than we need and the file-ending checks and classifiers would erroneously fail. +splitExtensionsOptGz :: FilePath -> (FilePath, String) +splitExtensionsOptGz fp = + if takeExtension fp /= ".gz" then -- if the file doesn't end with gz, split at a single ending + splitExtension fp + else --if the file ends with ".gz" ... + let (path, allExtensions) = splitExtensions fp + extensionsList = drop 1 . splitOn "." $ allExtensions + in case extensionsList of + ["gz"] -> splitExtension fp -- ... and .gz is the only ending, split there! + [_, "gz"] -> splitExtensions fp -- ... and there are two endings with gz, use the default splitExtensions function + _ -> --otherwise split at two endings from the end: + let doubleExtension = ("." ++) . intercalate "." . reverse . take 2 . reverse $ extensionsList + extendedPath = path ++ "." ++ (intercalate "." . reverse . drop 2 . reverse $ extensionsList) + in (extendedPath, doubleExtension) + parseInGenoSep :: OP.Parser GenotypeFileSpec parseInGenoSep = parseEigenstrat <|> parsePlink <|> parseVCF where @@ -484,7 +505,7 @@ parseInGenoSep = parseEigenstrat <|> parsePlink <|> parseVCF pure Nothing <*> parseFileWithEndings "Eigenstrat individual file" "indFile" [".ind"] <*> pure Nothing - parsePlink = GenotypeEigenstrat <$> + parsePlink = GenotypePlink <$> parseFileWithEndings "Plink genotype matrix, optionally gzipped" "bedFile" [".bed", ".bed.gz"] <*> pure Nothing <*> parseFileWithEndings "Plink snp positions file, optionally gzipped" "bimFile" [".bim", ".bim.gz"] <*> @@ -501,7 +522,9 @@ parseFileWithEndings help long endings = OP.option (OP.maybeReader fileEndingRea OP.metavar "FILE") where fileEndingReader :: String -> Maybe FilePath - fileEndingReader optString = if takeExtensions optString `elem` endings then Just (dropExtensions optString) else Nothing + fileEndingReader p = + let (_, extension) = splitExtensionsOptGz p + in if extension `elem` endings then Just p else Nothing parseGenotypeSNPSet :: OP.Parser SNPSetSpec parseGenotypeSNPSet = OP.option (OP.eitherReader readSnpSet) ( diff --git a/src/Poseidon/ColumnTypes.hs b/src/Poseidon/ColumnTypes.hs index f9d1815f..8d0f7bdc 100644 --- a/src/Poseidon/ColumnTypes.hs +++ b/src/Poseidon/ColumnTypes.hs @@ -436,8 +436,8 @@ instance Makeable JannoNrSNPs where case T.signed T.decimal x of Left e -> fail $ "Nr_SNPs can not be converted to Int because " ++ e Right (num, "") -> - if num < 1 - then fail $ "Nr_SNPs " ++ show x ++ " lower than 1, which is not meaningful." + if num < 0 + then fail $ "Nr_SNPs " ++ show x ++ " lower than 0, which is not meaningful." else pure $ JannoNrSNPs num Right (_, rest) -> fail $ "Nr_SNPs can not be converted to Int, because of a trailing " ++ show rest instance Show JannoNrSNPs where show (JannoNrSNPs x) = show x diff --git a/src/Poseidon/ColumnTypesUtils.hs b/src/Poseidon/ColumnTypesUtils.hs index 6ae5886b..c0738055 100644 --- a/src/Poseidon/ColumnTypesUtils.hs +++ b/src/Poseidon/ColumnTypesUtils.hs @@ -20,7 +20,7 @@ class Makeable a where parseTypeCSV :: forall a m. (MonadFail m, Makeable a, Typeable a) => String -> S.ByteString -> m a parseTypeCSV colname x = case T.decodeUtf8' x of Left e -> fail $ show e ++ " in column " ++ colname - Right t -> make t + Right t -> make $ T.strip t -- template haskell function to generate repetitive instances makeInstances :: Name -> String -> DecsQ diff --git a/src/Poseidon/Janno.hs b/src/Poseidon/Janno.hs index 8f6057fd..a4772ba5 100644 --- a/src/Poseidon/Janno.hs +++ b/src/Poseidon/Janno.hs @@ -58,7 +58,7 @@ import qualified Control.Monad.Writer as W import Data.Bifunctor (second) import qualified Data.ByteString.Char8 as Bchs import qualified Data.ByteString.Lazy.Char8 as Bch -import Data.Char (chr, isSpace, ord) +import Data.Char (chr, ord) import qualified Data.Csv as Csv import Data.Either (lefts, rights) import qualified Data.HashMap.Strict as HM @@ -275,31 +275,8 @@ filterLookupOptional m name = maybe (pure Nothing) Csv.parseField . cleanInput $ cleanInput :: Maybe Bchs.ByteString -> Maybe Bchs.ByteString cleanInput Nothing = Nothing -cleanInput (Just rawInputBS) = transNA $ trimWS . removeNoBreakSpace $ rawInputBS +cleanInput (Just rawInputBS) = transNA rawInputBS where - trimWS :: Bchs.ByteString -> Bchs.ByteString - trimWS = Bchs.dropWhile isSpace . Bchs.dropWhileEnd isSpace - removeNoBreakSpace :: Bchs.ByteString -> Bchs.ByteString - removeNoBreakSpace x - | not $ Bchs.isInfixOf "\194\160" x = x - | otherwise = removeNoBreakSpace $ (\(a,b) -> a <> Bchs.drop 2 b) $ Bchs.breakSubstring "\194\160" x - -- When a unicode string with the No-Break Space character is loaded, parsed - -- and written by cassava (in encodeByNameWith) it is unexpectedly expanded: - -- "MAMS-47224\194\160" becomes "MAMS-47224\195\130\194\160 - -- This was surprisingly hard to fix. We decided to remove No-Break Space chars - -- entirely before parsing them. - -- Here are some resources to see, which unicode characters are actually in a string: - -- https://www.soscisurvey.de/tools/view-chars.php - -- https://qaz.wtf/u/show.cgi - -- https://onlineunicodetools.com/convert-unicode-to-bytes - -- The following code removes the characters \194 and \160 independently. - -- This breaks other unicode characters and therefore does not solve the problem - --Bchs.filter (\y -> y /= '\194' && y /= '\160') x -- \160 is No-Break Space - -- The following code allows to debug the issue more precisely - --let !a = unsafePerformIO $ putStrLn $ show x - -- b = ... - -- !c = unsafePerformIO $ putStrLn $ show b - --in b transNA :: Bchs.ByteString -> Maybe Bchs.ByteString transNA "" = Nothing transNA "n/a" = Nothing diff --git a/test/Poseidon/InterfaceSpec.hs b/test/Poseidon/InterfaceSpec.hs index a493b213..9b8c60f7 100644 --- a/test/Poseidon/InterfaceSpec.hs +++ b/test/Poseidon/InterfaceSpec.hs @@ -9,6 +9,8 @@ import Test.Hspec spec :: Spec spec = do testParseInGenoOne + testParseInGenoSep + testSplitExtensionsOptGz runParser :: OP.Parser a -> [String] -> Maybe a runParser p s = OP.getParseResult $ OP.execParserPure OP.defaultPrefs (OP.info p mempty) s @@ -28,3 +30,45 @@ testParseInGenoOne = describe "path/to/file.bim.gz" Nothing "path/to/file.fam" Nothing) +testParseInGenoSep :: Spec +testParseInGenoSep = describe + "Poseidon.OptparseApplicativeParsers.parseInGenoSep" $ do + it "should return the expected paths for EIGENSTRAT data" $ do + runParser parseInGenoSep [ + "--genoFile", "path/to/file.test.geno.gz" + , "--snpFile", "path/to/file.snp" + , "--indFile", "path/to/file.ind" + ] `shouldBe` + Just (GenotypeEigenstrat "path/to/file.test.geno.gz" Nothing + "path/to/file.snp" Nothing + "path/to/file.ind" Nothing) + it "should return the expected paths for PLINK data" $ do + runParser parseInGenoSep [ + "--bedFile", "path/to/file.test.bed.gz" + , "--bimFile", "path/to/file.bim" + , "--famFile", "path/to/file.fam" + ] `shouldBe` + Just (GenotypePlink "path/to/file.test.bed.gz" Nothing + "path/to/file.bim" Nothing + "path/to/file.fam" Nothing) + it "should return the expected paths for VCF data" $ do + runParser parseInGenoSep [ + "--vcfFile", "path/to/file.vcf" + ] `shouldBe` + Just (GenotypeVCF "path/to/file.vcf" Nothing) + +testSplitExtensionsOptGz :: Spec +testSplitExtensionsOptGz = describe + "Poseidon.OptparseApplicativeParsers.testSplitExtensionsOptGz" $ do + it "should split withempty ending" $ + splitExtensionsOptGz "myFile_noEnding" `shouldBe` ("myFile_noEnding", "") + it "should return an single extension if not gz" $ + splitExtensionsOptGz "myFile.txt" `shouldBe` ("myFile", ".txt") + it "...even if there are more dots" $ + splitExtensionsOptGz "myFile.double.txt" `shouldBe` ("myFile.double", ".txt") + it "should return only gz if that's the only ending" $ + splitExtensionsOptGz "myFile.gz" `shouldBe` ("myFile", ".gz") + it "should return two endings if ending with gz" $ + splitExtensionsOptGz "myFile.txt.gz" `shouldBe` ("myFile", ".txt.gz") + it "even if there are more" $ + splitExtensionsOptGz "myFile.double.txt.gz" `shouldBe` ("myFile.double", ".txt.gz") diff --git a/test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt b/test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt index bad7c07a..dde5af12 100644 --- a/test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt +++ b/test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt @@ -118,6 +118,8 @@ ad7e56177aad0a720f0bde13d47f2ac1 forge forge/ForgePac19/CHANGELOG.md 8538ffd971ebb12cf5ef6e338da27970 forge forge/ForgePac19/ForgePac19.bim 9b22cab26cdae87bd79d24c289e48433 forge forge/ForgePac19/ForgePac19.fam b7b649620cd37bd4a6d6f0f31c1c56da forge forge/ForgePac19/ForgePac19.janno +b36b3ca509c235d0f15571c96195e801 forge forge/ForgePac20/POSEIDON.yml +e375863bca9e4a91c9855396abde31c7 forge forge/ForgePac20/ForgePac20.janno d4a05cfef045648238a94a9d621cf667 chronicle chronicle/chronicle1.yml b43da4d5734371c0648553120f812466 timetravel timetravel/Lamnidis_2018-1.0.0/POSEIDON.yml 8d57ce1a1ab28c0d8a5f391dd790a59c timetravel timetravel/Lamnidis_2018-1.0.1/POSEIDON.yml diff --git a/test/PoseidonGoldenTests/GoldenTestData/chronicle/chronicle2.yml b/test/PoseidonGoldenTests/GoldenTestData/chronicle/chronicle2.yml index f4402da5..7d3a5ce9 100644 --- a/test/PoseidonGoldenTests/GoldenTestData/chronicle/chronicle2.yml +++ b/test/PoseidonGoldenTests/GoldenTestData/chronicle/chronicle2.yml @@ -1,29 +1,29 @@ title: Chronicle title description: Chronicle description chronicleVersion: 0.2.0 -lastModified: 2024-10-25 +lastModified: 2024-10-31 packages: - title: Lamnidis_2018 version: 1.0.0 - commit: e20fd40ca6aae8aafe97b3e60aa63a69045de227 + commit: 0c11f86fe594dff1e8dc9b9c4c8568e8318f01bb path: Lamnidis_2018 - title: Lamnidis_2018 version: 1.0.1 - commit: e20fd40ca6aae8aafe97b3e60aa63a69045de227 + commit: 0c11f86fe594dff1e8dc9b9c4c8568e8318f01bb path: Lamnidis_2018_newVersion - title: Schiffels version: 1.1.1 - commit: b5ecaa05ecbdfa85bd8fa3646e1680e0c88a4020 + commit: c31c1842f859c42a35713c227c0ba589689d31aa path: Schiffels - title: Schiffels_2016 version: 1.0.1 - commit: e20fd40ca6aae8aafe97b3e60aa63a69045de227 + commit: 0c11f86fe594dff1e8dc9b9c4c8568e8318f01bb path: Schiffels_2016 - title: Schmid_2028 version: 1.0.0 - commit: e20fd40ca6aae8aafe97b3e60aa63a69045de227 + commit: 0c11f86fe594dff1e8dc9b9c4c8568e8318f01bb path: Schmid_2028 - title: Wang_2020 version: 0.1.0 - commit: e20fd40ca6aae8aafe97b3e60aa63a69045de227 + commit: 0c11f86fe594dff1e8dc9b9c4c8568e8318f01bb path: Wang_2020 diff --git a/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.bed b/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.bed new file mode 100644 index 00000000..67beaf48 --- /dev/null +++ b/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.bed @@ -0,0 +1 @@ +l \ No newline at end of file diff --git a/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.bim b/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.bim new file mode 100644 index 00000000..e0296974 --- /dev/null +++ b/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.bim @@ -0,0 +1,9 @@ +1 1_752566 2.013e-2 752566 G A +1 1_842013 2.2518e-2 842013 T G +1 1_891021 2.4116e-2 891021 G A +1 1_949654 2.5727e-2 949654 A G +2 2_1018704 2.6288e-2 1018704 A G +2 2_1045331 2.6665e-2 1045331 G A +2 2_1048955 2.6674e-2 1048955 A G +2 2_1061166 2.6711e-2 1061166 T C +2 2_1108637 2.8311e-2 1108637 G A diff --git a/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.fam b/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.fam new file mode 100644 index 00000000..8081f8b5 --- /dev/null +++ b/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.fam @@ -0,0 +1,2 @@ +POP1 XXX001 0 0 2 0 +POP2 XXX002 0 0 2 0 diff --git a/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.janno b/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.janno new file mode 100644 index 00000000..06214078 --- /dev/null +++ b/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.janno @@ -0,0 +1,3 @@ +Poseidon_ID Genetic_Sex Group_Name Alternative_IDs Relation_To Relation_Degree Relation_Type Relation_Note Collection_ID Country Country_ISO Location Site Latitude Longitude Date_Type Date_C14_Labnr Date_C14_Uncal_BP Date_C14_Uncal_BP_Err Date_BC_AD_Start Date_BC_AD_Median Date_BC_AD_Stop Date_Note MT_Haplogroup Y_Haplogroup Source_Tissue Nr_Libraries Library_Names Capture_Type UDG Library_Built Genotype_Ploidy Data_Preparation_Pipeline_URL Endogenous Nr_SNPs Coverage_on_Target_SNPs Damage Contamination Contamination_Err Contamination_Meas Contamination_Note Genetic_Source_Accession_IDs Primary_Contact Publication Note Keywords +XXX001 F POP1 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a testà n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a 9 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a +XXX002 F POP2 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a MAMS-47224 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a 9 n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a n/a diff --git a/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/POSEIDON.yml b/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/POSEIDON.yml new file mode 100644 index 00000000..77f66204 --- /dev/null +++ b/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/POSEIDON.yml @@ -0,0 +1,12 @@ +poseidonVersion: 2.7.1 +title: ForgePac20 +description: Empty package template. Please add a description +packageVersion: 0.1.0 +lastModified: 1970-01-01 +genotypeData: + format: PLINK + genoFile: ForgePac20.bed + snpFile: ForgePac20.bim + indFile: ForgePac20.fam + snpSet: Other +jannoFile: ForgePac20.janno diff --git a/test/PoseidonGoldenTests/GoldenTestsRunCommands.hs b/test/PoseidonGoldenTests/GoldenTestsRunCommands.hs index c5da80ed..d896cc95 100644 --- a/test/PoseidonGoldenTests/GoldenTestsRunCommands.hs +++ b/test/PoseidonGoldenTests/GoldenTestsRunCommands.hs @@ -272,7 +272,6 @@ testPipelineInit testDir checkFilePath = do , "init_vcf" "Schiffels_vcf" "Schiffels.bib" ] - testPipelineValidate :: FilePath -> FilePath -> IO () testPipelineValidate testDir checkFilePath = do let validateOpts1 = ValidateOptions { @@ -1040,6 +1039,29 @@ testPipelineForge testDir checkFilePath = do "forge" "ForgePac19" "ForgePac19.janno" ] + -- the .janno file in Schmid_2028_utf8 includes various utf-8 encoding related issues + -- some old versions of trident failed on "testà" and "MAMS-47224 " + -- or turned it into "testÃ" and "MAMS-47224 " + let forgeOpts20 = ForgeOptions { + _forgeGenoSources = [PacBaseDir $ testPacsDirOther "Schmid_2028_utf8"] + , _forgeEntityInput = [] + , _forgeSnpFile = Nothing + , _forgeIntersect = False + , _forgeOutFormat = "PLINK" + , _forgeOutMode = NormalOut + , _forgeOutPacPath = testDir "forge" "ForgePac20" + , _forgeOutPacName = Just "ForgePac20" + , _forgePackageWise = False + , _forgeOutputPlinkPopMode = PlinkPopNameAsFamily + , _forgeOutputOrdered = False + } + let action20 = testLog (runForge forgeOpts20) >> patchLastModified testDir ("forge" "ForgePac20" "POSEIDON.yml") + runAndChecksumFiles checkFilePath testDir action20 "forge" [ + "forge" "ForgePac20" "POSEIDON.yml", + "forge" "ForgePac20" "ForgePac20.janno" + ] + + testPipelineChronicleAndTimetravel :: FilePath -> FilePath -> IO () testPipelineChronicleAndTimetravel testDir checkFilePath = do -- create relevant test directories diff --git a/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/POSEIDON.yml b/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/POSEIDON.yml new file mode 100644 index 00000000..d50da3cf --- /dev/null +++ b/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/POSEIDON.yml @@ -0,0 +1,16 @@ +poseidonVersion: 2.6.0 +title: Schmid_2028 +description: Genetic data that will never be published in Schmid et al. 2028 +contributor: +- name: Clemens Schmid + email: schmid@institute.org +packageVersion: 1.0.0 +lastModified: 2023-01-12 +genotypeData: + format: EIGENSTRAT + genoFile: geno.txt + snpFile: snp.txt + indFile: ind.txt + snpSet: Other +jannoFile: Schmid_2028.janno + diff --git a/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/Schmid_2028.janno b/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/Schmid_2028.janno new file mode 100755 index 00000000..2c26f32d --- /dev/null +++ b/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/Schmid_2028.janno @@ -0,0 +1,3 @@ +Poseidon_ID Group_Name Genetic_Sex Date_C14_Labnr +XXX001 POP1 F testà +XXX002 POP2 F MAMS-47224  diff --git a/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/geno.txt b/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/geno.txt new file mode 100644 index 00000000..e5e735c6 --- /dev/null +++ b/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/geno.txt @@ -0,0 +1,9 @@ +00 +00 +00 +00 +00 +00 +00 +00 +00 diff --git a/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/ind.txt b/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/ind.txt new file mode 100644 index 00000000..5f99068f --- /dev/null +++ b/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/ind.txt @@ -0,0 +1,2 @@ +XXX001 F POP1 +XXX002 F POP2 diff --git a/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/snp.txt b/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/snp.txt new file mode 100644 index 00000000..6a56b243 --- /dev/null +++ b/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/snp.txt @@ -0,0 +1,9 @@ +1_752566 1 0.020130 752566 G A +1_842013 1 0.022518 842013 T G +1_891021 1 0.024116 891021 G A +1_949654 1 0.025727 949654 A G +2_1018704 2 0.026288 1018704 A G +2_1045331 2 0.026665 1045331 G A +2_1048955 2 0.026674 1048955 A G +2_1061166 2 0.026711 1061166 T C +2_1108637 2 0.028311 1108637 G A