Skip to content

Commit

Permalink
finished API and compiles. Not tested yet
Browse files Browse the repository at this point in the history
  • Loading branch information
stschiff committed Nov 30, 2023
1 parent cdaa4e5 commit 0e57116
Show file tree
Hide file tree
Showing 4 changed files with 131 additions and 52 deletions.
3 changes: 2 additions & 1 deletion poseidon-hs.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ library
Poseidon.CLI.Summarise, Poseidon.CLI.Validate, Poseidon.Utils,
Poseidon.CLI.Survey, Poseidon.CLI.Forge, Poseidon.CLI.Init,
Poseidon.CLI.Rectify, Poseidon.CLI.Fetch, Poseidon.CLI.Genoconvert,
Poseidon.CLI.OptparseApplicativeParsers, Poseidon.CLI.Timetravel
Poseidon.CLI.OptparseApplicativeParsers, Poseidon.CLI.Timetravel,
Poseidon.CLI.Jannocoalesce
other-modules: Paths_poseidon_hs
hs-source-dirs: src
build-depends: base >= 4.7 && < 5, sequence-formats>=1.6.1, text, time, pipes-safe,
Expand Down
39 changes: 27 additions & 12 deletions src-executables/Main-trident.hs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import Poseidon.CLI.Genoconvert (GenoconvertOptions (..
runGenoconvert)
import Poseidon.CLI.Init (InitOptions (..),
runInit)
import Poseidon.CLI.Jannocoalesce (JannoCoalesceOptions (..),
runJannocoalesce)
import Poseidon.CLI.List (ListOptions (..),
runList)
import Poseidon.CLI.OptparseApplicativeParsers
Expand Down Expand Up @@ -68,6 +70,7 @@ data Subcommand =
| CmdChronicle ChronicleOptions
| CmdTimetravel TimetravelOptions
| CmdServe ServeOptions
| CmdJannoCoalesce JannoCoalesceOptions

main :: IO ()
main = do
Expand All @@ -88,18 +91,20 @@ main = do

runCmd :: Subcommand -> PoseidonIO ()
runCmd o = case o of
CmdInit opts -> runInit opts
CmdList opts -> runList opts
CmdFetch opts -> runFetch opts
CmdForge opts -> runForge opts
CmdGenoconvert opts -> runGenoconvert opts
CmdSummarise opts -> runSummarise opts
CmdSurvey opts -> runSurvey opts
CmdRectify opts -> runRectify opts
CmdValidate opts -> runValidate opts
CmdChronicle opts -> runChronicle opts
CmdTimetravel opts -> runTimetravel opts
CmdServe opts -> runServerMainThread opts
-- alphabetic order
CmdChronicle opts -> runChronicle opts
CmdFetch opts -> runFetch opts
CmdForge opts -> runForge opts
CmdGenoconvert opts -> runGenoconvert opts
CmdJannoCoalesce opts -> runJannocoalesce opts
CmdInit opts -> runInit opts
CmdList opts -> runList opts
CmdRectify opts -> runRectify opts
CmdServe opts -> runServerMainThread opts
CmdSummarise opts -> runSummarise opts
CmdSurvey opts -> runSurvey opts
CmdTimetravel opts -> runTimetravel opts
CmdValidate opts -> runValidate opts

optParserInfo :: OP.ParserInfo Options
optParserInfo = OP.info (
Expand Down Expand Up @@ -131,6 +136,7 @@ subcommandParser = OP.subparser (
OP.command "fetch" fetchOptInfo <>
OP.command "forge" forgeOptInfo <>
OP.command "genoconvert" genoconvertOptInfo <>
OP.command "jannocoalesce" jannocoalesceOptInfo <>
OP.command "rectify" rectifyOptInfo <>
OP.commandGroup "Package creation and manipulation commands:"
) <|>
Expand Down Expand Up @@ -182,6 +188,8 @@ subcommandParser = OP.subparser (
(OP.progDesc "Construct package directories from chronicle files")
serveOptInfo = OP.info (OP.helper <*> (CmdServe <$> serveOptParser))
(OP.progDesc "Serve Poseidon packages via HTTP or HTTPS")
jannocoalesceOptInfo = OP.info (OP.helper <*> (CmdJannoCoalesce <$> jannocoalesceOptParser))
(OP.progDesc "Coalesce information from one or multiple janno files to another one")

initOptParser :: OP.Parser InitOptions
initOptParser = InitOptions <$> parseInGenotypeDataset
Expand Down Expand Up @@ -260,3 +268,10 @@ serveOptParser = ServeOptions <$> parseArchiveBasePaths
<*> parsePort
<*> parseIgnoreChecksums
<*> parseMaybeCertFiles

jannocoalesceOptParser :: OP.Parser JannoCoalesceOptions
jannocoalesceOptParser = JannoCoalesceOptions <$> parseJannocoalSourceSpec
<*> parseJannocoalTargetFile
<*> parseJannocoalOutSpec
<*> parseJannocoalFillColumns
<*> parseJannocoalOverride
44 changes: 31 additions & 13 deletions src/Poseidon/CLI/Jannocoalesce.hs
Original file line number Diff line number Diff line change
@@ -1,27 +1,44 @@
{-# LANGUAGE OverloadedStrings #-}
module Poseidon.CLI.Jannocoalesce where

import Poseidon.Janno (JannoRow(..), JannoRows(..), writeJannoFile, readJannoFile)
import Poseidon.Utils (PoseidonIO, PoseidonException(..))
import Poseidon.Janno (JannoRow (..), JannoRows (..),
readJannoFile, writeJannoFile)
import Poseidon.Package (PackageReadOptions (..),
defaultPackageReadOptions,
getJointJanno,
readPoseidonPackageCollection)
import Poseidon.Utils (PoseidonException (..), PoseidonIO)

import Control.Monad (forM)
import Control.Monad.Catch (throwM, MonadThrow)
import Control.Monad.IO.Class (liftIO)
import qualified Data.ByteString.Char8 as BSC
import qualified Data.Csv as Csv
import qualified Data.HashMap.Strict as HM
import Control.Monad (forM)
import Control.Monad.Catch (MonadThrow, throwM)
import Control.Monad.IO.Class (liftIO)
import qualified Data.ByteString.Char8 as BSC
import qualified Data.Csv as Csv
import qualified Data.HashMap.Strict as HM

-- the source can be a single janno file, or a set of base directories as usual.
data JannoSourceSpec = JannoSourceSingle FilePath | JannoSourceBaseDirs [FilePath]

data JannoCoalesceOptions = JannoCoalesceOptions
{ _jannocoalesceSource :: FilePath
{ _jannocoalesceSource :: JannoSourceSpec
, _jannocoalesceTarget :: FilePath
, _jannocoalesceOutSpec :: Maybe FilePath -- Nothing means "in place"
, _jannocoalesceFillColumns :: [String] -- empty list means All
, _jannocoalesceOverwriteColumns :: Bool
}

runJannocoalesce :: JannoCoalesceOptions -> PoseidonIO ()
runJannocoalesce (JannoCoalesceOptions source target outSpec fields overwrite) = do
JannoRows sourceRows <- readJannoFile source
runJannocoalesce (JannoCoalesceOptions sourceSpec target outSpec fields overwrite) = do
JannoRows sourceRows <- case sourceSpec of
JannoSourceSingle sourceFile -> readJannoFile sourceFile
JannoSourceBaseDirs sourceDirs -> do
let pacReadOpts = defaultPackageReadOptions {
_readOptIgnoreChecksums = True
, _readOptGenoCheck = False
, _readOptIgnoreGeno = True
, _readOptOnlyLatest = True
}
getJointJanno <$> readPoseidonPackageCollection pacReadOpts sourceDirs
JannoRows targetRows <- readJannoFile target
newJanno <- forM targetRows $ \targetRow -> do
let posId = jPoseidonID targetRow
Expand All @@ -30,7 +47,7 @@ runJannocoalesce (JannoCoalesceOptions source target outSpec fields overwrite) =
[] -> return targetRow
[keyRow] -> mergeRow targetRow keyRow fields overwrite
_ -> throwM $ PoseidonGenericException $ "source file contains multiple rows with key " ++ posId
let outPath = maybe target id outSpec
let outPath = maybe target id outSpec
liftIO $ writeJannoFile outPath (JannoRows newJanno)


Expand All @@ -43,8 +60,9 @@ mergeRow targetRow sourceRow fields overwrite = do
Left err -> throwM $ PoseidonGenericException $ "Janno row-merge error: " ++ err
Right r -> return r
where
mergeIfMissing :: BSC.ByteString -> BSC.ByteString -> BSC.ByteString -> BSC.ByteString
mergeIfMissing key targetVal sourceVal =
if (null key || (BSC.unpack key `elem` fields)) && (targetVal `elem` ["n/a", ""] || overwrite) then
if (null fields || (BSC.unpack key `elem` fields)) && (targetVal `elem` ["n/a", ""] || overwrite) then
sourceVal
else
targetVal
97 changes: 71 additions & 26 deletions src/Poseidon/CLI/OptparseApplicativeParsers.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,32 +2,37 @@

module Poseidon.CLI.OptparseApplicativeParsers where

import Poseidon.CLI.Chronicle (ChronOperation (..))
import Poseidon.CLI.List (ListEntity (..), RepoLocationSpec (..))
import Poseidon.CLI.Rectify (ChecksumsToRectify (..),
PackageVersionUpdate (..))
import Poseidon.CLI.Validate (ValidatePlan (..))
import Poseidon.Contributor (ContributorSpec (..),
contributorSpecParser)
import Poseidon.EntityTypes (EntitiesList, EntityInput (..),
PoseidonEntity, SignedEntitiesList,
SignedEntity, readEntitiesFromString)
import Poseidon.GenotypeData (GenoDataSource (..),
GenotypeDataSpec (..),
GenotypeFormatSpec (..),
SNPSetSpec (..))
import Poseidon.ServerClient (ArchiveEndpoint (..))
import Poseidon.Utils (LogMode (..), TestMode (..))
import Poseidon.Version (VersionComponent (..), parseVersion)

import Control.Applicative ((<|>))
import Data.List.Split (splitOn)
import Data.Version (Version)
import qualified Options.Applicative as OP
import SequenceFormats.Plink (PlinkPopNameMode (PlinkPopNameAsBoth, PlinkPopNameAsFamily, PlinkPopNameAsPhenotype))
import System.FilePath (dropExtension, takeExtension, (<.>))
import qualified Text.Parsec as P
import Text.Read (readMaybe)
import Poseidon.CLI.Chronicle (ChronOperation (..))
import Poseidon.CLI.Jannocoalesce (JannoSourceSpec (..))
import Poseidon.CLI.List (ListEntity (..),
RepoLocationSpec (..))
import Poseidon.CLI.Rectify (ChecksumsToRectify (..),
PackageVersionUpdate (..))
import Poseidon.CLI.Validate (ValidatePlan (..))
import Poseidon.Contributor (ContributorSpec (..),
contributorSpecParser)
import Poseidon.EntityTypes (EntitiesList, EntityInput (..),
PoseidonEntity, SignedEntitiesList,
SignedEntity,
readEntitiesFromString)
import Poseidon.GenotypeData (GenoDataSource (..),
GenotypeDataSpec (..),
GenotypeFormatSpec (..),
SNPSetSpec (..))
import Poseidon.ServerClient (ArchiveEndpoint (..))
import Poseidon.Utils (LogMode (..), TestMode (..))
import Poseidon.Version (VersionComponent (..),
parseVersion)

import Control.Applicative ((<|>))
import Data.List.Split (splitOn)
import Data.Version (Version)
import qualified Options.Applicative as OP
import SequenceFormats.Plink (PlinkPopNameMode (PlinkPopNameAsBoth, PlinkPopNameAsFamily, PlinkPopNameAsPhenotype))
import System.FilePath (dropExtension, takeExtension,
(<.>))
import qualified Text.Parsec as P
import Text.Read (readMaybe)


parseChronOperation :: OP.Parser ChronOperation
Expand Down Expand Up @@ -762,3 +767,43 @@ parseMaybeArchiveName = OP.option (Just <$> OP.str) (
OP.value Nothing <>
OP.showDefault
)

parseJannocoalSourceSpec :: OP.Parser JannoSourceSpec
parseJannocoalSourceSpec = parseJannocoalSingleSource <|> (JannoSourceBaseDirs <$> parseBasePaths)
where
parseJannocoalSingleSource = OP.option (JannoSourceSingle <$> OP.str) (
OP.long "sourceFile" <>
OP.metavar "FILE" <>
OP.help "The source Janno file"
)

parseJannocoalTargetFile :: OP.Parser FilePath
parseJannocoalTargetFile = OP.strOption (
OP.long "targetFile" <>
OP.metavar "FILE" <>
OP.help "The target file to fill"
)

parseJannocoalOutSpec :: OP.Parser (Maybe FilePath)
parseJannocoalOutSpec = OP.option (Just <$> OP.str) (
OP.long "outFile" <>
OP.metavar "FILE" <>
OP.value Nothing <>
OP.showDefault <>
OP.help "An optional file to write the results to. If not specified, change the target file in place."
)

parseJannocoalFillColumns :: OP.Parser [String]
parseJannocoalFillColumns = OP.option (splitOn "," <$> OP.str) (
OP.long "fillColumns" <>
OP.short 'f' <>
OP.value [] <>
OP.help "A comma-separated list of Janno field names. If not specified, fill all columns that can be found in the source and target."
)

parseJannocoalOverride :: OP.Parser Bool
parseJannocoalOverride = OP.switch (
OP.long "force" <>
OP.short 'f' <>
OP.help "With this option, potential non-missing content in target columns gets overridden with non-missing content in source columns. By default, only missing data gets filled-in."
)

0 comments on commit 0e57116

Please sign in to comment.