Merge branch 'master' into add_bib_listing

poseidon-framework · Dec 3, 2024 · 45f5982 · 45f5982
2 parents 597fa31 + 46b0043
commit 45f5982
Show file tree

Hide file tree

Showing 21 changed files with 200 additions and 43 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -25,7 +25,7 @@ jobs:
         with:
           target: stack:poseidon-tools-tests
       - name: Upload coverage to codecov
-        uses: codecov/codecov-action@v4
+        uses: codecov/codecov-action@v5
         with:
           token: ${{ secrets.CODECOV_TOKEN }} # set in organization settings
       # render and deploy haddock documentation

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,9 +1,19 @@
 - V 1.5.9.0:
     - Added a feature to list bibliography information via `trident list --bibliography`.
     - Added a new Server API `/bibliography` to serve bibliography information via HTTP.
+- V 1.5.7.4:
+    - Fixed a bug that broke the long-form genotype data input option (with `--genoFile + --snpFile + ...`).
+- V 1.5.7.3:
+    - Allowed `0` in the `Nr_SNPs` .janno column.
+- V 1.5.7.2: 
+    - Fixed a bug introduced in Version 1.5.5.0, where input using option "-p" (for example in init) would not behave correctly if input files have multiple file endings, separated by dots.
+- V 1.5.7.1:
+    - Fixed a bug in the .janno reading triggered by trailing `à` characters and caused by premature whitespace trimming.
+    - Removed the hacky `removeNoBreakSpace` function from the .janno reading pipeline. It is not necessary any more.
+    - Added a golden test that ensures both changes perform as expected.
 - V 1.5.7.0:
     - Added support for VCF files (Variant Call Format) in Janno-packages.
-    - restructured test package structure, affecting some of the unit- and golden tests.
+    - Restructured test package structure, affecting some of the unit- and golden tests.
 - V 1.5.6.0:
     - Introduced individual `Janno...` types for every .janno column (except Poseidon_ID) in a new module `ColumnTypes`. This was done to improve .janno validation error messages.
     - Defined a typeclass `Makeable` with a function `make` to write smart constructors for the column types.

diff --git a/CHANGELOGRELEASE.md b/CHANGELOGRELEASE.md
@@ -1,5 +1,18 @@
+### V 1.5.7.3
+
+This patch release fixes three minor bugs, some of which were accidentally introduced with the big changes in v1.5.7.0.
+
+1. Fixed a bug in the .janno reading triggered by trailing `à` characters.
+2. Reverted unspecified behaviour: `0` is again allowed in the `Nr_SNPs` .janno column.
+3. Fixed a bug introduced in v1.5.5.0, where command line input using the `-p` option would not behave correctly if the input files have multiple file endings, separated by dots.
+
 ### V 1.5.7.0
 
+> [!WARNING]
+> On 2024/11/06 we realized that this release includes a breaking change that is not documented below.
+> The command line input interface for unpackaged genotype data was modified from previously `--inFormat EIGENSTRAT|PLINK + --genoFile + --snpFile + --indFile` to now `--genoFile + --snpFile + --indFile` and `--bedFile + --bimFile + --famFile`. So the format selection with the `--inFormat` argument was removed and replaced with separate file selectors for EIGENSTRAT and PLINK data.
+> This affects all `trident` subcommands that allow reading of unpackaged genotype data, namely `init`, `forge`, `genoconvert` and `validate`.
+
 This release further improves `.janno` parsing error messages and adds reading support for gzipped PLINK (`.bed` and `.bim`) and EIGENSTRAT (`.geno` and `.snp`) files. We also added (experimental) support for reading VCF files.
 
 #### Better .janno error messages

diff --git a/src/Poseidon/CLI/OptparseApplicativeParsers.hs b/src/Poseidon/CLI/OptparseApplicativeParsers.hs
@@ -37,8 +37,8 @@ import           Data.List.Split            (splitOn)
 import           Data.Version               (Version)
 import qualified Options.Applicative        as OP
 import           SequenceFormats.Plink      (PlinkPopNameMode (PlinkPopNameAsBoth, PlinkPopNameAsFamily, PlinkPopNameAsPhenotype))
-import           System.FilePath            (dropExtensions, takeExtensions,
-                                             (<.>))
+import           System.FilePath            (splitExtension, splitExtensions,
+                                             takeExtension, (<.>))
 import qualified Text.Parsec                as P
 import           Text.Read                  (readMaybe)
 
@@ -454,7 +454,9 @@ parseInGenoOne = OP.option (OP.eitherReader readGenoInput) (
                 \For VCF please see option --vcfFile")
     where
         readGenoInput :: FilePath -> Either String GenotypeFileSpec
-        readGenoInput p = makeGenoInput (dropExtensions p) (takeExtensions p)
+        readGenoInput p =
+            let (path, extension) = splitExtensionsOptGz p
+            in  makeGenoInput path extension
         makeGenoInput path ext
             | ext `elem` [".geno",    ".snp",   ".ind"] =
                 Right $ GenotypeEigenstrat (path <.> ".geno")    Nothing
@@ -474,6 +476,25 @@ parseInGenoOne = OP.option (OP.eitherReader readGenoInput) (
                                            (path <.> ".fam")     Nothing
             | otherwise = Left $ "unknown file extension: " ++ ext
 
+-- a "smarter" version of `takeExtensions` and `dropExtensions, which splits a filepath at two extensions
+-- if the last one is ".gz" but otherwise splits only one.
+-- This is important because users may submit files with multiple dots in their name, in which case takeExtensions would return
+-- more than we need and the file-ending checks and classifiers would erroneously fail.
+splitExtensionsOptGz :: FilePath -> (FilePath, String)
+splitExtensionsOptGz fp =
+    if takeExtension fp /= ".gz" then -- if the file doesn't end with gz, split at a single ending
+        splitExtension fp
+    else --if the file ends with ".gz" ...
+        let (path, allExtensions) = splitExtensions fp
+            extensionsList = drop 1 . splitOn "." $ allExtensions
+        in  case extensionsList of
+                ["gz"]    -> splitExtension fp -- ... and .gz is the only ending, split there!
+                [_, "gz"] -> splitExtensions fp -- ... and there are two endings with gz, use the default splitExtensions function
+                _ -> --otherwise split at two endings from the end:
+                    let doubleExtension  = ("." ++) . intercalate "." . reverse . take 2 . reverse $ extensionsList
+                        extendedPath = path ++ "." ++ (intercalate "." . reverse . drop 2 . reverse $ extensionsList)
+                    in (extendedPath, doubleExtension)
+
 parseInGenoSep :: OP.Parser GenotypeFileSpec
 parseInGenoSep = parseEigenstrat <|> parsePlink <|> parseVCF
   where
@@ -484,7 +505,7 @@ parseInGenoSep = parseEigenstrat <|> parsePlink <|> parseVCF
         pure Nothing <*>
         parseFileWithEndings "Eigenstrat individual file" "indFile" [".ind"] <*>
         pure Nothing
-    parsePlink = GenotypeEigenstrat <$>
+    parsePlink = GenotypePlink <$>
         parseFileWithEndings "Plink genotype matrix, optionally gzipped" "bedFile" [".bed", ".bed.gz"] <*>
         pure Nothing <*>
         parseFileWithEndings "Plink snp positions file, optionally gzipped" "bimFile" [".bim",  ".bim.gz"] <*>
@@ -501,7 +522,9 @@ parseFileWithEndings help long endings = OP.option (OP.maybeReader fileEndingRea
     OP.metavar "FILE")
   where
     fileEndingReader :: String -> Maybe FilePath
-    fileEndingReader optString = if takeExtensions optString `elem` endings then Just (dropExtensions optString) else Nothing
+    fileEndingReader p =
+        let (_, extension) = splitExtensionsOptGz p
+        in if extension `elem` endings then Just p else Nothing
 
 parseGenotypeSNPSet :: OP.Parser SNPSetSpec
 parseGenotypeSNPSet = OP.option (OP.eitherReader readSnpSet) (

diff --git a/src/Poseidon/ColumnTypes.hs b/src/Poseidon/ColumnTypes.hs
@@ -436,8 +436,8 @@ instance Makeable JannoNrSNPs where
         case T.signed T.decimal x of
             Left e -> fail $ "Nr_SNPs can not be converted to Int because " ++ e
             Right (num, "") ->
-                if num < 1
-                then fail $ "Nr_SNPs " ++ show x ++ " lower than 1, which is not meaningful."
+                if num < 0
+                then fail $ "Nr_SNPs " ++ show x ++ " lower than 0, which is not meaningful."
                 else pure $ JannoNrSNPs num
             Right (_, rest) -> fail $ "Nr_SNPs can not be converted to Int, because of a trailing " ++ show rest
 instance Show JannoNrSNPs where          show (JannoNrSNPs x) = show x

diff --git a/src/Poseidon/ColumnTypesUtils.hs b/src/Poseidon/ColumnTypesUtils.hs
@@ -20,7 +20,7 @@ class Makeable a where
 parseTypeCSV :: forall a m. (MonadFail m, Makeable a, Typeable a) => String -> S.ByteString -> m a
 parseTypeCSV colname x = case T.decodeUtf8' x of
         Left e  -> fail $ show e ++ " in column " ++ colname
-        Right t -> make t
+        Right t -> make $ T.strip t
 
 -- template haskell function to generate repetitive instances
 makeInstances :: Name -> String -> DecsQ

diff --git a/src/Poseidon/Janno.hs b/src/Poseidon/Janno.hs
@@ -58,7 +58,7 @@ import qualified Control.Monad.Writer                 as W
 import           Data.Bifunctor                       (second)
 import qualified Data.ByteString.Char8                as Bchs
 import qualified Data.ByteString.Lazy.Char8           as Bch
-import           Data.Char                            (chr, isSpace, ord)
+import           Data.Char                            (chr, ord)
 import qualified Data.Csv                             as Csv
 import           Data.Either                          (lefts, rights)
 import qualified Data.HashMap.Strict                  as HM
@@ -275,31 +275,8 @@ filterLookupOptional m name = maybe (pure Nothing) Csv.parseField . cleanInput $
 
 cleanInput :: Maybe Bchs.ByteString -> Maybe Bchs.ByteString
 cleanInput Nothing           = Nothing
-cleanInput (Just rawInputBS) = transNA $ trimWS . removeNoBreakSpace $ rawInputBS
+cleanInput (Just rawInputBS) = transNA rawInputBS
     where
-        trimWS :: Bchs.ByteString -> Bchs.ByteString
-        trimWS = Bchs.dropWhile isSpace . Bchs.dropWhileEnd isSpace
-        removeNoBreakSpace :: Bchs.ByteString -> Bchs.ByteString
-        removeNoBreakSpace x
-            | not $ Bchs.isInfixOf "\194\160" x = x
-            | otherwise = removeNoBreakSpace $ (\(a,b) -> a <> Bchs.drop 2 b) $ Bchs.breakSubstring "\194\160" x
-            -- When a unicode string with the No-Break Space character is loaded, parsed
-            -- and written by cassava (in encodeByNameWith) it is unexpectedly expanded:
-            -- "MAMS-47224\194\160" becomes "MAMS-47224\195\130\194\160
-            -- This was surprisingly hard to fix. We decided to remove No-Break Space chars
-            -- entirely before parsing them.
-            -- Here are some resources to see, which unicode characters are actually in a string:
-            -- https://www.soscisurvey.de/tools/view-chars.php
-            -- https://qaz.wtf/u/show.cgi
-            -- https://onlineunicodetools.com/convert-unicode-to-bytes
-            -- The following code removes the characters \194 and \160 independently.
-            -- This breaks other unicode characters and therefore does not solve the problem
-            --Bchs.filter (\y -> y /= '\194' && y /= '\160') x -- \160 is No-Break Space
-            -- The following code allows to debug the issue more precisely
-            --let !a = unsafePerformIO $ putStrLn $ show x
-            --    b = ...
-            --    !c = unsafePerformIO $ putStrLn $ show b
-            --in b
         transNA :: Bchs.ByteString -> Maybe Bchs.ByteString
         transNA ""    = Nothing
         transNA "n/a" = Nothing

diff --git a/test/Poseidon/InterfaceSpec.hs b/test/Poseidon/InterfaceSpec.hs
@@ -9,6 +9,8 @@ import           Test.Hspec
 spec :: Spec
 spec = do
     testParseInGenoOne
+    testParseInGenoSep
+    testSplitExtensionsOptGz
 
 runParser :: OP.Parser a -> [String] -> Maybe a
 runParser p s = OP.getParseResult $ OP.execParserPure OP.defaultPrefs (OP.info p mempty) s
@@ -28,3 +30,45 @@ testParseInGenoOne = describe
                                     "path/to/file.bim.gz" Nothing
                                     "path/to/file.fam"    Nothing)
 
+testParseInGenoSep :: Spec
+testParseInGenoSep = describe
+    "Poseidon.OptparseApplicativeParsers.parseInGenoSep" $ do
+        it "should return the expected paths for EIGENSTRAT data" $ do
+            runParser parseInGenoSep [
+                  "--genoFile", "path/to/file.test.geno.gz"
+                , "--snpFile",  "path/to/file.snp"
+                , "--indFile",  "path/to/file.ind"
+                ] `shouldBe`
+                Just (GenotypeEigenstrat "path/to/file.test.geno.gz" Nothing
+                                         "path/to/file.snp"          Nothing
+                                         "path/to/file.ind"          Nothing)
+        it "should return the expected paths for PLINK data" $ do
+            runParser parseInGenoSep [
+                  "--bedFile", "path/to/file.test.bed.gz"
+                , "--bimFile", "path/to/file.bim"
+                , "--famFile", "path/to/file.fam"
+                ] `shouldBe`
+                Just (GenotypePlink "path/to/file.test.bed.gz" Nothing
+                                    "path/to/file.bim"         Nothing
+                                    "path/to/file.fam"         Nothing)
+        it "should return the expected paths for VCF data" $ do
+            runParser parseInGenoSep [
+                  "--vcfFile", "path/to/file.vcf"
+                ] `shouldBe`
+                Just (GenotypeVCF "path/to/file.vcf" Nothing)
+
+testSplitExtensionsOptGz :: Spec
+testSplitExtensionsOptGz = describe
+    "Poseidon.OptparseApplicativeParsers.testSplitExtensionsOptGz" $ do
+        it "should split withempty ending" $
+            splitExtensionsOptGz "myFile_noEnding" `shouldBe` ("myFile_noEnding", "")
+        it "should return an single extension if not gz" $
+            splitExtensionsOptGz "myFile.txt" `shouldBe` ("myFile", ".txt")
+        it "...even if there are more dots" $
+            splitExtensionsOptGz "myFile.double.txt" `shouldBe` ("myFile.double", ".txt")
+        it "should return only gz if that's the only ending" $
+            splitExtensionsOptGz "myFile.gz" `shouldBe` ("myFile", ".gz")
+        it "should return two endings if ending with gz" $
+            splitExtensionsOptGz "myFile.txt.gz" `shouldBe` ("myFile", ".txt.gz")
+        it "even if there are more" $
+            splitExtensionsOptGz "myFile.double.txt.gz" `shouldBe` ("myFile.double", ".txt.gz")
diff --git a/test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt b/test/PoseidonGoldenTests/GoldenTestCheckSumFile.txt
@@ -118,6 +118,8 @@ ad7e56177aad0a720f0bde13d47f2ac1 forge forge/ForgePac19/CHANGELOG.md
 8538ffd971ebb12cf5ef6e338da27970 forge forge/ForgePac19/ForgePac19.bim
 9b22cab26cdae87bd79d24c289e48433 forge forge/ForgePac19/ForgePac19.fam
 b7b649620cd37bd4a6d6f0f31c1c56da forge forge/ForgePac19/ForgePac19.janno
+b36b3ca509c235d0f15571c96195e801 forge forge/ForgePac20/POSEIDON.yml
+e375863bca9e4a91c9855396abde31c7 forge forge/ForgePac20/ForgePac20.janno
 d4a05cfef045648238a94a9d621cf667 chronicle chronicle/chronicle1.yml
 b43da4d5734371c0648553120f812466 timetravel timetravel/Lamnidis_2018-1.0.0/POSEIDON.yml
 8d57ce1a1ab28c0d8a5f391dd790a59c timetravel timetravel/Lamnidis_2018-1.0.1/POSEIDON.yml

diff --git a/test/PoseidonGoldenTests/GoldenTestData/chronicle/chronicle2.yml b/test/PoseidonGoldenTests/GoldenTestData/chronicle/chronicle2.yml
@@ -1,29 +1,29 @@
 title: Chronicle title
 description: Chronicle description
 chronicleVersion: 0.2.0
-lastModified: 2024-10-25
+lastModified: 2024-10-31
 packages:
 - title: Lamnidis_2018
   version: 1.0.0
-  commit: e20fd40ca6aae8aafe97b3e60aa63a69045de227
+  commit: 0c11f86fe594dff1e8dc9b9c4c8568e8318f01bb
   path: Lamnidis_2018
 - title: Lamnidis_2018
   version: 1.0.1
-  commit: e20fd40ca6aae8aafe97b3e60aa63a69045de227
+  commit: 0c11f86fe594dff1e8dc9b9c4c8568e8318f01bb
   path: Lamnidis_2018_newVersion
 - title: Schiffels
   version: 1.1.1
-  commit: b5ecaa05ecbdfa85bd8fa3646e1680e0c88a4020
+  commit: c31c1842f859c42a35713c227c0ba589689d31aa
   path: Schiffels
 - title: Schiffels_2016
   version: 1.0.1
-  commit: e20fd40ca6aae8aafe97b3e60aa63a69045de227
+  commit: 0c11f86fe594dff1e8dc9b9c4c8568e8318f01bb
   path: Schiffels_2016
 - title: Schmid_2028
   version: 1.0.0
-  commit: e20fd40ca6aae8aafe97b3e60aa63a69045de227
+  commit: 0c11f86fe594dff1e8dc9b9c4c8568e8318f01bb
   path: Schmid_2028
 - title: Wang_2020
   version: 0.1.0
-  commit: e20fd40ca6aae8aafe97b3e60aa63a69045de227
+  commit: 0c11f86fe594dff1e8dc9b9c4c8568e8318f01bb
   path: Wang_2020
diff --git a/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.bed b/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.bed
@@ -0,0 +1 @@
+l
diff --git a/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.bim b/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.bim
@@ -0,0 +1,9 @@
+1	1_752566	2.013e-2	752566	G	A
+1	1_842013	2.2518e-2	842013	T	G
+1	1_891021	2.4116e-2	891021	G	A
+1	1_949654	2.5727e-2	949654	A	G
+2	2_1018704	2.6288e-2	1018704	A	G
+2	2_1045331	2.6665e-2	1045331	G	A
+2	2_1048955	2.6674e-2	1048955	A	G
+2	2_1061166	2.6711e-2	1061166	T	C
+2	2_1108637	2.8311e-2	1108637	G	A
diff --git a/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.fam b/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.fam
@@ -0,0 +1,2 @@
+POP1	XXX001	0	0	2	0
+POP2	XXX002	0	0	2	0
diff --git a/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.janno b/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/ForgePac20.janno
@@ -0,0 +1,3 @@
+Poseidon_ID	Genetic_Sex	Group_Name	Alternative_IDs	Relation_To	Relation_Degree	Relation_Type	Relation_Note	Collection_ID	Country	Country_ISO	Location	Site	Latitude	Longitude	Date_Type	Date_C14_Labnr	Date_C14_Uncal_BP	Date_C14_Uncal_BP_Err	Date_BC_AD_Start	Date_BC_AD_Median	Date_BC_AD_Stop	Date_Note	MT_Haplogroup	Y_Haplogroup	Source_Tissue	Nr_Libraries	Library_Names	Capture_Type	UDG	Library_Built	Genotype_Ploidy	Data_Preparation_Pipeline_URL	Endogenous	Nr_SNPs	Coverage_on_Target_SNPs	Damage	Contamination	Contamination_Err	Contamination_Meas	Contamination_Note	Genetic_Source_Accession_IDs	Primary_Contact	Publication	Note	Keywords
+XXX001	F	POP1	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	testà	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	9	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a
+XXX002	F	POP2	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	MAMS-47224	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	9	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a	n/a
diff --git a/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/POSEIDON.yml b/test/PoseidonGoldenTests/GoldenTestData/forge/ForgePac20/POSEIDON.yml
@@ -0,0 +1,12 @@
+poseidonVersion: 2.7.1
+title: ForgePac20
+description: Empty package template. Please add a description
+packageVersion: 0.1.0
+lastModified: 1970-01-01
+genotypeData:
+  format: PLINK
+  genoFile: ForgePac20.bed
+  snpFile: ForgePac20.bim
+  indFile: ForgePac20.fam
+  snpSet: Other
+jannoFile: ForgePac20.janno
diff --git a/test/PoseidonGoldenTests/GoldenTestsRunCommands.hs b/test/PoseidonGoldenTests/GoldenTestsRunCommands.hs
@@ -272,7 +272,6 @@ testPipelineInit testDir checkFilePath = do
         , "init_vcf" </> "Schiffels_vcf" </> "Schiffels.bib"
         ]
 
-
 testPipelineValidate :: FilePath -> FilePath -> IO ()
 testPipelineValidate testDir checkFilePath = do
     let validateOpts1 = ValidateOptions {
@@ -1040,6 +1039,29 @@ testPipelineForge testDir checkFilePath = do
           "forge" </> "ForgePac19" </> "ForgePac19.janno"
         ]
 
+    -- the .janno file in Schmid_2028_utf8 includes various utf-8 encoding related issues
+    -- some old versions of trident failed on "testà" and "MAMS-47224 "
+    -- or turned it into "testÃ" and "MAMS-47224Â "
+    let forgeOpts20 = ForgeOptions {
+          _forgeGenoSources  = [PacBaseDir $ testPacsDirOther </> "Schmid_2028_utf8"]
+        , _forgeEntityInput  = []
+        , _forgeSnpFile      = Nothing
+        , _forgeIntersect    = False
+        , _forgeOutFormat    = "PLINK"
+        , _forgeOutMode      = NormalOut
+        , _forgeOutPacPath   = testDir </> "forge" </> "ForgePac20"
+        , _forgeOutPacName   = Just "ForgePac20"
+        , _forgePackageWise  = False
+        , _forgeOutputPlinkPopMode = PlinkPopNameAsFamily
+        , _forgeOutputOrdered = False
+    }
+    let action20 = testLog (runForge forgeOpts20) >> patchLastModified testDir ("forge" </> "ForgePac20" </> "POSEIDON.yml")
+    runAndChecksumFiles checkFilePath testDir action20 "forge" [
+          "forge" </> "ForgePac20" </> "POSEIDON.yml",
+          "forge" </> "ForgePac20" </> "ForgePac20.janno"
+        ]
+
+
 testPipelineChronicleAndTimetravel :: FilePath -> FilePath -> IO ()
 testPipelineChronicleAndTimetravel testDir checkFilePath = do
     -- create relevant test directories

diff --git a/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/POSEIDON.yml b/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/POSEIDON.yml
@@ -0,0 +1,16 @@
+poseidonVersion: 2.6.0
+title: Schmid_2028
+description: Genetic data that will never be published in Schmid et al. 2028
+contributor:
+- name: Clemens Schmid
+  email: [email protected]
+packageVersion: 1.0.0
+lastModified: 2023-01-12
+genotypeData:
+  format: EIGENSTRAT
+  genoFile: geno.txt
+  snpFile: snp.txt
+  indFile: ind.txt
+  snpSet: Other
+jannoFile: Schmid_2028.janno
+
diff --git a/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/Schmid_2028.janno b/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/Schmid_2028.janno
@@ -0,0 +1,3 @@
+Poseidon_ID	Group_Name	Genetic_Sex	Date_C14_Labnr
+XXX001	POP1	F	testà
+XXX002	POP2	F	MAMS-47224 
diff --git a/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/geno.txt b/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/geno.txt
@@ -0,0 +1,9 @@
+00
+00
+00
+00
+00
+00
+00
+00
+00
diff --git a/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/ind.txt b/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/ind.txt
@@ -0,0 +1,2 @@
+XXX001 F POP1
+XXX002 F POP2
diff --git a/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/snp.txt b/test/testDat/testPackages/other_test_packages/Schmid_2028_utf8/snp.txt
@@ -0,0 +1,9 @@
+1_752566	1	0.020130	752566	G	A
+1_842013	1	0.022518	842013	T	G
+1_891021	1	0.024116	891021	G	A
+1_949654	1	0.025727	949654	A	G
+2_1018704	2	0.026288	1018704	A	G
+2_1045331	2	0.026665	1045331	G	A
+2_1048955	2	0.026674	1048955	A	G
+2_1061166	2	0.026711	1061166	T	C
+2_1108637	2	0.028311	1108637	G	A