diff --git a/gwastoolkit.conf b/gwastoolkit.conf index d2de717..54b0f45 100755 --- a/gwastoolkit.conf +++ b/gwastoolkit.conf @@ -8,11 +8,12 @@ SOFTWARE="/hpc/local/Rocky8/dhl_ec/software" ### REQUIRED: Path_to where GWASToolKit resides on the server. -GWASTOOLKITDIR="${SOFTWARE}/GWASToolKit" +# GWASTOOLKITDIR="${SOFTWARE}/GWASToolKit" +GWASTOOLKITDIR="/hpc/dhl_ec/tpeters/git_repos/GWASToolKit" ### REQUIRED: Path_to support programs on the server SNPTEST="${SOFTWARE}/snptest_v2.5.4" -PLINK2="${SOFTWARE}/plink19" +PLINK2="${SOFTWARE}/plink2" LOCUSZOOM13="${SOFTWARE}/locuszoom_1.3/bin/locuszoom" ### REQUIRED: SLURM settings -- these should work universally @@ -63,20 +64,22 @@ QTIMEGENECLEANER="01:00:00" # 1hours to clean ### 'FAIL' Mail is sent when the job fails. ### 'REQUEUE' Mail is sent when the job is re-queued; ### 'ALL' Mail sent for all the above. -YOUREMAIL="s.w.vanderlaan-2@umcutrecht.nl" +YOUREMAIL="t.s.peters-4@umcutrecht.nl" MAILSETTINGS="FAIL" ### ANALYSIS SETTINGS ### REQUIRED: Path_to where the main analysis directory resides. Make sure that it exists -PROJECTDIR="/hpc/dhl_ec/projects/SOMEDIR" +PROJECTDIR="/hpc/dhl_ec/tpeters/git_repos/GWASToolKit/test_output" ### REQUIRED: Name of the project, this will automatically be made. -PROJECTNAME="SOME_FANCY_PROJECTNAME" +PROJECTNAME="test_project" ### REQUIRED: Analysis settings. ### You can choose one of these options [GWAS/VARIANT/REGION/GENES]. -ANALYSIS_TYPE="VARIANT" +ANALYSIS_TYPE="GWAS" +### You can choose one of these options if GWAS is chosen [SNPTEST/REGENIE] +GWAS_TYPE="REGENIE" ### You can choose one of these options [AEGS/AAAGS/CTMM/UCORBIO/MYOMARKER/HELPFULL/RIVM]. STUDY_TYPE="AEGS" @@ -103,7 +106,8 @@ PHENOTYPE_FILE="${PROJECTDIR}/phenotypes.txt" # ### AEGS, TOPMed r3, f10, b38 - vcf.gz files # (note that b38 codes chromosomes as 'chr[#]', for example 'chr1') -IMPUTEDDATA="/hpc/dhl_ec/data/_ae_originals/AEGS_QC_imputation_2023/aegscombo/_topmed_r3_f10_b38/aegscombo.topmed_r3_f10_b38.split_norm_af_filter.chr" +# IMPUTEDDATA="/hpc/dhl_ec/data/_ae_originals/AEGS_QC_imputation_2023/aegscombo/_topmed_r3_f10_b38/aegscombo.topmed_r3_f10_b38.split_norm_af_filter.chr" +IMPUTEDDATA="/hpc/dhl_ec/tpeters/regenie_pgen/converted/aegscombo_topmed_r3_f10_b38.chr" ### AAAGS, 1000G phase 3, GoNL5 # IMPUTEDDATA="/hpc/dhl_ec/data/_aaa_originals/AAAGS_IMPUTE2_1000Gp3_GoNL5/aaags_1kGp3GoNL5_RAW_chr" @@ -127,7 +131,7 @@ IMPUTEDDATA="/hpc/dhl_ec/data/_ae_originals/AEGS_QC_imputation_2023/aegscombo/_t # SAMPLE_FILE="${PROJECTDIR}/20230523.PCSK9.AEGS123.males.sample" # SAMPLE_FILE="${PROJECTDIR}/20230523.PCSK9.AEGS123.sample" # SAMPLE_FILE_CHRX="${PROJECTDIR}/20230523.PCSK9.AEGS123.chrX.sample" -SAMPLE_FILE="${PROJECTDIR}/20240531.PCSK9.AEGS123.sample" +SAMPLE_FILE="/hpc/dhl_ec/tpeters/regenie_pgen/20240905.IPH_Binary.AEGS123.pheno" ### AAAGS # SAMPLE_FILE="/hpc/dhl_ec/data/_aaa_originals/pheno_cov_exclusions/aaags_phenocov.sample" @@ -222,3 +226,35 @@ REFERENCEDATA="${GWASTOOLKITDIR}/RESOURCES/1000Gp3v5_EUR/1000Gp3v5.20130502.EUR" ### - GENCODE based: gencode_v19_GRCh37_hg19_Feb2009.txt.gz ### - PLINK-style gene list: glist-hg19.gz HG19_GENES="${GWASTOOLKITDIR}/RESOURCES/glist-hg19.gz" + + +# REGENIE Addition +REGENIE="${SOFTWARE}/mambaforge3/envs/gwasregenie/bin/regenie" + +REGENIE_CALL_RATE="0.10" # PLINK call rate with the flag --geno +REGENIE_MAF="0.10" # PLINK MAF with the flag --maf +REGENIE_HWE="1e-3" # PLINK Hardy-Weinberg Equilibrium (HWE) with the flag --hwe +REGENIE_PRUNE="100 10 0.2" #Prune the data to only select independent SNPs (with low LD r^2) of one pair each with r^2 = 0.2 with the flags --indep-pairwise + +REGENIE_STEP1_BZISE="1000" +REGENIE_STEP2_BZISE="1000" + +IMPUTEDDATA_ALLCHR="/hpc/dhl_ec/tpeters/regenie_pgen/OUT/aegscombo_topmed_r3_f10_b38.allChrs" + +QMEMGWASREGENIE="16G" # '8Gb' for GWAS Regenie +QTIMEGWASREGENIE="00:15:00" # 12 hours for GWAS Regenie +QMEMGWASREGENIE1="64G" # '8Gb' for GWAS Regenie +QTIMEGWASREGENIE1="2:00:00" # 12 hours for GWAS Regenie +QMEMGWASREGENIE2="64G" # '8Gb' for GWAS Regenie +QTIMEGWASREGENIE2="8:00:00" # 12 hours for GWAS Regenie +QMEMGWASREGENIEWRAP="16G" # '8Gb' for GWAS Regenie +QTIMEGWASREGENIEWRAP="00:15:00" # 12 hours for GWAS Regenie + +EXCLUDE_RANGE_FILE="/hpc/dhl_ec/tpeters/regenie_pgen/exclude_problematic_range_b38.txt" + +COVARIATE_QUANTATIVE="Age,PC1,PC2,ORyear" +COVARIATE_BINARY="SEX" +# PHENOTYPE_QUANTATIVE="IPH_CLAM_prob,IPH_CLAM_area,IPH_CLAM_prob_rankNorm,IPH_CLAM_area_rankNorm" +# PHENOTYPE_QUANTATIVE="" +PHENOTYPE_BINARY="IPH,IPH_CLAM" +# PHENOTYPE_BINARY="" diff --git a/gwastoolkit.regenie.qc.sh b/gwastoolkit.regenie.qc.sh new file mode 100755 index 0000000..cdfcea0 --- /dev/null +++ b/gwastoolkit.regenie.qc.sh @@ -0,0 +1,190 @@ +#!/bin/bash + +### Creating display functions +### Setting colouring +NONE='\033[00m' +OPAQUE='\033[2m' +FLASHING='\033[5m' +BOLD='\033[1m' +ITALIC='\033[3m' +UNDERLINE='\033[4m' +STRIKETHROUGH='\033[9m' + +RED='\033[01;31m' +GREEN='\033[01;32m' +YELLOW='\033[01;33m' +PURPLE='\033[01;35m' +CYAN='\033[01;36m' +WHITE='\033[01;37m' + +function echobold { #'echobold' is the function name + echo -e "${BOLD}${1}${NONE}" # this is whatever the function needs to execute, note ${1} is the text for echo +} +function echoitalic { + echo -e "${ITALIC}${1}${NONE}" +} +function echonooption { + echo -e "${OPAQUE}${RED}${1}${NONE}" +} +function echoerrorflash { + echo -e "${RED}${BOLD}${FLASHING}${1}${NONE}" +} +function echoerror { + echo -e "${RED}${1}${NONE}" +} +# errors no option +function echoerrornooption { + echo -e "${YELLOW}${1}${NONE}" +} +function echoerrorflashnooption { + echo -e "${YELLOW}${BOLD}${FLASHING}${1}${NONE}" +} + +### MESSAGE FUNCTIONS +script_copyright_message() { + echo "" + THISYEAR=$(date +'%Y') + echo "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo "+ The MIT License (MIT) +" + echo "+ Copyright (c) 2015-${THISYEAR} Tim S. Peters +" + echo "+ +" + echo "+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +" + echo "+ associated documentation files (the \"Software\"), to deal in the Software without restriction, +" + echo "+ including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, +" + echo "+ and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, +" + echo "+ subject to the following conditions: +" + echo "+ +" + echo "+ The above copyright notice and this permission notice shall be included in all copies or substantial +" + echo "+ portions of the Software. +" + echo "+ +" + echo "+ THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +" + echo "+ NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +" + echo "+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +" + echo "+ OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +" + echo "+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +" + echo "+ +" + echo "+ Reference: http://opensource.org. +" + echo "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" +} +script_arguments_error() { + echoerror "$1" # ERROR MESSAGE + echoerror "" + echoerror "- Argument #1 is path_to the configuration file." + echoerror "" + echoerror "An example command would be: gwastoolkit.regenie.qc.sh [arg1: path_to_configuration_file]" + echoerror "" + echoerror "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + # The wrong arguments are passed, so we'll exit the script now! + exit 1 +} + +echobold "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" +echobold " GWASTOOLKIT QUALITY CONTROL for REGENIE" +echobold " quality control of REGENIE analysis results" +echobold "" +echobold " Version : v1.0.0" +echobold "" +echobold " Last update: 2024-08-29" +echobold " Written by : Tim S. Peters (t.s.peters-4@umcutrecht.nl)." +echobold "" +echobold " Testers: - " +echobold "" +echobold " Description: Quality control of a REGENIE analysis:" +echobold " Filter on: - autosome snps" +echobold " - MAF" +echobold " - MAC" +echobold " - call rate" +echobold " - Hardy-Weinberg Equilibrium (HWE) p-value" +echobold " - LD r^2" +echobold " - exclude problamatic SNPs in long-range LD regions" +echobold "" +echobold "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + +### LOADING CONFIGURATION FILE +source "$1" # Depends on arg1. + +### REQUIRED | GENERALS +CONFIGURATIONFILE="$1" # Depends on arg1 -- but also on where it resides!!! + +### START of if-else statement for the number of command-line arguments passed ### +if [[ $# -lt 1 ]]; then + echo "Oh, computer says no! Number of arguments found "$#"." + script_arguments_error "You must supply [1] argument for cleaning of *** GENOME-WIDE ANALYSIS *** results!" + script_copyright_message + +else + ### Make and/or set the output directory + if [ ! -d ${PROJECTDIR}/${PROJECTNAME}/regenie_results ]; then + echo "The output directory does not exist. Making and setting it." + mkdir -v ${PROJECTDIR}/${PROJECTNAME}/regenie_results + OUTPUT_DIR=${PROJECTDIR}/${PROJECTNAME}/regenie_results + else + echo "The output directory already exists. Setting it." + OUTPUT_DIR=${PROJECTDIR}/${PROJECTNAME}/regenie_results + fi + + if [ ! -d ${OUTPUT_DIR}/pre_processing ]; then + echo "The QC output directory does not exist. Making and setting it." + mkdir -v ${OUTPUT_DIR}/pre_processing + QC_OUTPUT_DIR=${OUTPUT_DIR}/pre_processing + else + echo "The QC output directory already exists. Setting it." + QC_OUTPUT_DIR=${OUTPUT_DIR}/pre_processing + fi + + echo "All arguments are passed. These are the settings:" + echo "The output directory is...................: ${OUTPUT_DIR}" + echo "The call rate filter is...................: ${REGENIE_CALL_RATE}" + echo "The minimum minor allele frequency is.....: ${REGENIE_MAF}" + echo "The minimum HWE p-value is................: ${REGENIE_HWE}" + echo "The filter for independent SNPs is........: ${REGENIE_PRUNE}" + echo "" + # PLOT GWAS RESULTS + echo "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo " STARTING QUALITY CONTROL" + echo "" + echo "Please be patient...this can take a long time depending on the number of files." + echo "We started at: "$(date) + echo "" + echo "" + echo "First, get a list of A/T and C/G SNPs, to exclude later:" + cat ${IMPUTEDDATA_ALLCHR}.pvar | \ + awk '($4 == "A" && $5 == "T") || ($4 == "T" && $5 == "A") || ($4 == "C" && $5 == "G") || ($4 == "G" && $5 == "C")' | \ + awk '{ print $3, $1, $2, 0, $4, $5 }' > ${QC_OUTPUT_DIR}/all.atcg.variants.txt + echo "" + echo "" + echo "Second, filtering data, using the following criteria: " + echo " * CALL RATE < ${REGENIE_CALL_RATE}" + echo " * MAF >= ${REGENIE_MAF}" + echo " * HWE <= ${REGENIE_HWE}" + echo " * indep_pairwise ${REGENIE_PRUNE}" + $PLINK2 --pfile ${IMPUTEDDATA_ALLCHR} \ + --autosome \ + --maf ${REGENIE_MAF} --geno ${REGENIE_CALL_RATE} --hwe ${REGENIE_HWE} \ + --exclude-if-info "R2>0.99" \ + --indep-pairwise ${REGENIE_PRUNE}\ + --exclude range ${EXCLUDE_RANGE_FILE} \ + --make-pgen --out ${QC_OUTPUT_DIR}/aegscombo_topmed_r3_f10_b38.allChrs.temp + echo "" + echo "" + echo "Third, prune out unwanted SNPs in high LD." + $PLINK2 --pfile ${QC_OUTPUT_DIR}/aegscombo_topmed_r3_f10_b38.allChrs.temp \ + --extract ${QC_OUTPUT_DIR}/aegscombo_topmed_r3_f10_b38.allChrs.temp.prune.in \ + --make-pgen --out ${QC_OUTPUT_DIR}/aegscombo_topmed_r3_f10_b38.allChrs.ultraclean.temp + echo "" + echo "" + echo "Fourth, remove the A/T and C/G SNPs." + $PLINK2 --pfile ${QC_OUTPUT_DIR}/aegscombo_topmed_r3_f10_b38.allChrs.ultraclean.temp \ + --exclude ${QC_OUTPUT_DIR}/all.atcg.variants.txt \ + --write-snplist --write-samples --no-id-header \ + --make-pgen --out ${QC_OUTPUT_DIR}/aegscombo_topmed_r3_f10_b38.allChrs.QC + echo "" + echo "" + echo "Finishing up..." + rm -fv ${QC_OUTPUT_DIR}/*.temp* + echo "" + echo "" +### END of if-else statement for the number of command-line arguments passed ### +fi + +# script_copyright_message diff --git a/gwastoolkit.regenie.step1.sh b/gwastoolkit.regenie.step1.sh new file mode 100755 index 0000000..d05eab0 --- /dev/null +++ b/gwastoolkit.regenie.step1.sh @@ -0,0 +1,244 @@ +#!/bin/bash + +### Creating display functions +### Setting colouring +NONE='\033[00m' +OPAQUE='\033[2m' +FLASHING='\033[5m' +BOLD='\033[1m' +ITALIC='\033[3m' +UNDERLINE='\033[4m' +STRIKETHROUGH='\033[9m' + +RED='\033[01;31m' +GREEN='\033[01;32m' +YELLOW='\033[01;33m' +PURPLE='\033[01;35m' +CYAN='\033[01;36m' +WHITE='\033[01;37m' + +function echobold { #'echobold' is the function name + echo -e "${BOLD}${1}${NONE}" # this is whatever the function needs to execute, note ${1} is the text for echo +} +function echoitalic { + echo -e "${ITALIC}${1}${NONE}" +} +function echonooption { + echo -e "${OPAQUE}${RED}${1}${NONE}" +} +function echoerrorflash { + echo -e "${RED}${BOLD}${FLASHING}${1}${NONE}" +} +function echoerror { + echo -e "${RED}${1}${NONE}" +} +# errors no option +function echoerrornooption { + echo -e "${YELLOW}${1}${NONE}" +} +function echoerrorflashnooption { + echo -e "${YELLOW}${BOLD}${FLASHING}${1}${NONE}" +} + +### MESSAGE FUNCTIONS +script_copyright_message() { + echo "" + THISYEAR=$(date +'%Y') + echo "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo "+ The MIT License (MIT) +" + echo "+ Copyright (c) 2015-${THISYEAR} Tim S. Peters +" + echo "+ +" + echo "+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +" + echo "+ associated documentation files (the \"Software\"), to deal in the Software without restriction, +" + echo "+ including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, +" + echo "+ and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, +" + echo "+ subject to the following conditions: +" + echo "+ +" + echo "+ The above copyright notice and this permission notice shall be included in all copies or substantial +" + echo "+ portions of the Software. +" + echo "+ +" + echo "+ THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +" + echo "+ NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +" + echo "+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +" + echo "+ OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +" + echo "+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +" + echo "+ +" + echo "+ Reference: http://opensource.org. +" + echo "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" +} +script_arguments_error() { + echoerror "$1" # ERROR MESSAGE + echoerror "" + echoerror "- Argument #1 is path_to the configuration file." + echoerror "- Argument #2 is determining whether to run regenie for BINARY or QUANTATIVE traits." + echoerror "" + echoerror "An example command would be: gwastoolkit.regenie.step1.sh [arg1: path_to_configuration_file] [args2: BINARY/QUANTATIVE]" + echoerror "" + echoerror "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + # The wrong arguments are passed, so we'll exit the script now! + exit 1 +} + +echobold "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" +echobold " GWASTOOLKIT REGENIE STEP 1" +echobold " step 1 of REGENIE analysis" +echobold "" +echobold " Version : v1.0.0" +echobold "" +echobold " Last update: 2024-08-29" +echobold " Written by : Tim S. Peters (t.s.peters-4@umcutrecht.nl)." +echobold "" +echobold " Testers: - " +echobold "" +echobold " Description: First step of GWAS using Regenie:" +echobold " Filter on: - autosome snps" +echobold " - MAF" +echobold " - MAC" +echobold " - call rate" +echobold " - Hardy-Weinberg Equilibrium (HWE) p-value" +echobold " - LD r^2" +echobold " - exclude problamatic SNPs in long-range LD regions" +echobold "" +echobold "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + +### LOADING CONFIGURATION FILE +source "$1" # Depends on arg1. + +### REQUIRED | GENERALS +CONFIGURATIONFILE="$1" # Depends on arg1 -- but also on where it resides!!! +TRAIT_TYPE="$2" # Depends on arg1 -- but also on where it resides!!! + +### START of if-else statement for the number of command-line arguments passed ### +if [[ $# -lt 2 ]]; then + echo "Oh, computer says no! Number of arguments found "$#"." + script_arguments_error "You must supply [2] arguments for cleaning of *** GENOME-WIDE ANALYSIS *** results!" + script_copyright_message + +else + if [[ "${TRAIT_TYPE}" != 'BINARY' && "${TRAIT_TYPE}" != 'QUANTATIVE' ]]; then + echo "Oh, computer says no! Second argument is not correct" + script_arguments_error "Argument 2 should be either 'BINARY' or 'QUANTATIVE'!" + script_copyright_message + exit 1 + fi + echo "" + echo "" + ### Make and/or set the output directory + if [ ! -d ${PROJECTDIR}/${PROJECTNAME}/regenie_results ]; then + echo "The output directory does not exist. Making and setting it." + mkdir -v ${PROJECTDIR}/${PROJECTNAME}/regenie_results + OUTPUT_DIR=${PROJECTDIR}/${PROJECTNAME}/regenie_results + else + echo "The output directory already exists. Setting it." + OUTPUT_DIR=${PROJECTDIR}/${PROJECTNAME}/regenie_results + fi + echo "" + + if [ ! -d ${OUTPUT_DIR}/pre_processing ]; then + echo "The QC output directory does not exist. Making and setting it." + echo "Something probably went wrong in the QC step." + exit 1 + else + echo "The QC output directory exists. Setting it." + QC_OUTPUT_DIR=${OUTPUT_DIR}/pre_processing + fi + echo "" + + if [ "${TRAIT_TYPE}" == 'QUANTATIVE' ]; then + echo "* Running REGENIE for QUANTATIVE traits" + if [ ! -d ${OUTPUT_DIR}/regenie_QT_step1 ]; then + echo "The Regenie Quantative traits step1 output directory does not exist. Making and setting it." + mkdir -v ${OUTPUT_DIR}/regenie_QT_step1 + STEP1_QT_OUTPUT_DIR=${OUTPUT_DIR}/regenie_QT_step1 + else + echo "The Regenie Quantative traits step1 output directory already exists. Setting it." + STEP1_QT_OUTPUT_DIR=${OUTPUT_DIR}/regenie_QT_step1 + fi + echo "" + fi + + if [ "${TRAIT_TYPE}" == 'BINARY' ]; then + echo "* Running REGENIE for BINARY traits" + if [ ! -d ${OUTPUT_DIR}/regenie_BT_step1 ]; then + echo "The Regenie Binary traits step1 output directory does not exist. Making and setting it." + mkdir -v ${OUTPUT_DIR}/regenie_BT_step1 + STEP1_BT_OUTPUT_DIR=${OUTPUT_DIR}/regenie_BT_step1 + else + echo "The Regenie Binary traits step1 output directory already exists. Setting it." + STEP1_BT_OUTPUT_DIR=${OUTPUT_DIR}/regenie_BT_step1 + fi + echo "" + fi + + if [ ! -d ${OUTPUT_DIR}/tmp_dir ]; then + echo "The temporary directory does not exist. Making and setting it." + mkdir -v ${OUTPUT_DIR}/tmp_dir + TMP_OUTPUT_DIR=${OUTPUT_DIR}/tmp_dir + else + echo "The temporary directory already exists. Setting it." + TMP_OUTPUT_DIR=${OUTPUT_DIR}/tmp_dir + fi + echo "" + echo "" + + if [ "${TRAIT_TYPE}" == 'QUANTATIVE' ]; then + echo "All arguments are passed. These are the settings:" + echo " * SAMPLE FILE: ${SAMPLE_FILE}" + echo " * QUANTATIVE COVARIATES: ${COVARIATE_QUANTATIVE}" + echo " * BINARY COVARIATES: ${COVARIATE_BINARY}" + echo " * QUANTATIVE PHENOTYPE: ${PHENOTYPE_QUANTATIVE}" + echo " * BINARY PHENOTYPE: -NOT USED-" + echo "" + echo " * BLOCK SIZE: ${REGENIE_STEP1_BZISE}" + echo "" + echo "" + echo "Start processing REGENIE step 1 for QUANTATIVE traits" + echo "" + $REGENIE \ + --step 1 \ + --pgen ${QC_OUTPUT_DIR}/aegscombo_topmed_r3_f10_b38.allChrs.QC \ + --extract ${QC_OUTPUT_DIR}/aegscombo_topmed_r3_f10_b38.allChrs.QC.snplist \ + --keep ${QC_OUTPUT_DIR}/aegscombo_topmed_r3_f10_b38.allChrs.QC.id \ + --covarFile ${SAMPLE_FILE} \ + --covarColList ${COVARIATE_QUANTATIVE} \ + --catCovarList ${COVARIATE_BINARY} \ + --phenoFile ${SAMPLE_FILE} \ + --phenoColList ${PHENOTYPE_QUANTATIVE} \ + --bsize="${REGENIE_STEP1_BZISE}" \ + --lowmem \ + --lowmem-prefix ${TMP_OUTPUT_DIR}/regenie_tmp_qt_preds \ + --out ${STEP1_QT_OUTPUT_DIR}/aegscombo_topmed_step1 + + elif [ "${TRAIT_TYPE}" == 'BINARY' ]; then + echo "All arguments are passed. These are the settings:" + echo " * SAMPLE FILE: ${SAMPLE_FILE}" + echo " * QUANTATIVE COVARIATES: ${COVARIATE_QUANTATIVE}" + echo " * BINARY COVARIATES: ${COVARIATE_BINARY}" + echo " * QUANTATIVE PHENOTYPE: -NOT USED-" + echo " * BINARY PHENOTYPE: ${PHENOTYPE_BINARY}" + echo "" + echo " * BLOCK SIZE: ${REGENIE_STEP1_BZISE}" + echo "" + echo "" + echo "Start processing REGENIE step 1 for BINARY traits" + echo "" + $REGENIE \ + --step 1 \ + --pgen ${QC_OUTPUT_DIR}/aegscombo_topmed_r3_f10_b38.allChrs.QC \ + --extract ${QC_OUTPUT_DIR}/aegscombo_topmed_r3_f10_b38.allChrs.QC.snplist \ + --keep ${QC_OUTPUT_DIR}/aegscombo_topmed_r3_f10_b38.allChrs.QC.id \ + --covarFile ${SAMPLE_FILE} \ + --covarColList ${COVARIATE_QUANTATIVE} \ + --catCovarList ${COVARIATE_BINARY} \ + --phenoFile ${SAMPLE_FILE} \ + --phenoColList ${PHENOTYPE_BINARY} \ + --bsize="${REGENIE_STEP1_BZISE}" \ + --lowmem --bt \ + --lowmem-prefix ${TMP_OUTPUT_DIR}/regenie_tmp_bt_preds \ + --out ${STEP1_BT_OUTPUT_DIR}/aegscombo_topmed_step1 + fi + +### END of if-else statement for the number of command-line arguments passed ### +fi + +# script_copyright_message diff --git a/gwastoolkit.regenie.step2.sh b/gwastoolkit.regenie.step2.sh new file mode 100755 index 0000000..c4704ee --- /dev/null +++ b/gwastoolkit.regenie.step2.sh @@ -0,0 +1,234 @@ +#!/bin/bash + +### Creating display functions +### Setting colouring +NONE='\033[00m' +OPAQUE='\033[2m' +FLASHING='\033[5m' +BOLD='\033[1m' +ITALIC='\033[3m' +UNDERLINE='\033[4m' +STRIKETHROUGH='\033[9m' + +RED='\033[01;31m' +GREEN='\033[01;32m' +YELLOW='\033[01;33m' +PURPLE='\033[01;35m' +CYAN='\033[01;36m' +WHITE='\033[01;37m' + +function echobold { #'echobold' is the function name + echo -e "${BOLD}${1}${NONE}" # this is whatever the function needs to execute, note ${1} is the text for echo +} +function echoitalic { + echo -e "${ITALIC}${1}${NONE}" +} +function echonooption { + echo -e "${OPAQUE}${RED}${1}${NONE}" +} +function echoerrorflash { + echo -e "${RED}${BOLD}${FLASHING}${1}${NONE}" +} +function echoerror { + echo -e "${RED}${1}${NONE}" +} +# errors no option +function echoerrornooption { + echo -e "${YELLOW}${1}${NONE}" +} +function echoerrorflashnooption { + echo -e "${YELLOW}${BOLD}${FLASHING}${1}${NONE}" +} + +### MESSAGE FUNCTIONS +script_copyright_message() { + echo "" + THISYEAR=$(date +'%Y') + echo "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo "+ The MIT License (MIT) +" + echo "+ Copyright (c) 2015-${THISYEAR} Tim S. Peters +" + echo "+ +" + echo "+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +" + echo "+ associated documentation files (the \"Software\"), to deal in the Software without restriction, +" + echo "+ including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, +" + echo "+ and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, +" + echo "+ subject to the following conditions: +" + echo "+ +" + echo "+ The above copyright notice and this permission notice shall be included in all copies or substantial +" + echo "+ portions of the Software. +" + echo "+ +" + echo "+ THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +" + echo "+ NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +" + echo "+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +" + echo "+ OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +" + echo "+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +" + echo "+ +" + echo "+ Reference: http://opensource.org. +" + echo "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" +} +script_arguments_error() { + echoerror "$1" # ERROR MESSAGE + echoerror "" + echoerror "- Argument #1 is path_to the configuration file." + echoerror "- Argument #2 is determining whether to run regenie for BINARY or QUANTATIVE traits." + echoerror "" + echoerror "An example command would be: gwastoolkit.regenie.step2.sh [arg1: path_to_configuration_file] [args2: BINARY/QUANTATIVE]" + echoerror "" + echoerror "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + # The wrong arguments are passed, so we'll exit the script now! + exit 1 +} + +echobold "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" +echobold " GWASTOOLKIT REGENIE STEP 2" +echobold " step 2 of REGENIE analysis" +echobold "" +echobold " Version : v1.0.0" +echobold "" +echobold " Last update: 2024-08-29" +echobold " Written by : Tim S. Peters (t.s.peters-4@umcutrecht.nl)." +echobold "" +echobold " Testers: - " +echobold "" +echobold "" +echobold "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + +### LOADING CONFIGURATION FILE +source "$1" # Depends on arg1. + +### REQUIRED | GENERALS +CONFIGURATIONFILE="$1" # Depends on arg1 -- but also on where it resides!!! +TRAIT_TYPE="$2" # Depends on arg1 -- but also on where it resides!!! + +### START of if-else statement for the number of command-line arguments passed ### +if [[ $# -lt 2 ]]; then + echo "Oh, computer says no! Number of arguments found "$#"." + script_arguments_error "You must supply [2] arguments for cleaning of *** GENOME-WIDE ANALYSIS *** results!" + script_copyright_message + +else + if [[ "${TRAIT_TYPE}" != 'BINARY' && "${TRAIT_TYPE}" != 'QUANTATIVE' ]]; then + echo "Oh, computer says no! Second argument is not correct" + script_arguments_error "Argument 2 should be either 'BINARY' or 'QUANTATIVE'!" + script_copyright_message + exit 1 + fi + echo "" + echo "" + ### Make and/or set the output directory + if [ ! -d ${PROJECTDIR}/${PROJECTNAME}/regenie_results ]; then + echo "The output directory does not exist. Making and setting it." + mkdir -v ${PROJECTDIR}/${PROJECTNAME}/regenie_results + OUTPUT_DIR=${PROJECTDIR}/${PROJECTNAME}/regenie_results + else + echo "The output directory already exists. Setting it." + OUTPUT_DIR=${PROJECTDIR}/${PROJECTNAME}/regenie_results + fi + echo "" + + if [ "${TRAIT_TYPE}" == 'QUANTATIVE' ]; then + echo "* Running REGENIE for QUANTATIVE traits" + if [ ! -d ${OUTPUT_DIR}/regenie_QT_step2 ]; then + echo "The Regenie Quantative traits step2 output directory does not exist. Making and setting it." + mkdir -v ${OUTPUT_DIR}/regenie_QT_step2 + STEP2_QT_OUTPUT_DIR=${OUTPUT_DIR}/regenie_QT_step2 + else + echo "The Regenie Quantative traits step2 output directory already exists. Setting it." + STEP2_QT_OUTPUT_DIR=${OUTPUT_DIR}/regenie_QT_step2 + fi + echo "" + + if [ ! -d ${OUTPUT_DIR}/regenie_QT_step1 ]; then + script_arguments_error "Regenie step 1 does not seem to be executed or the directory could not be found!" + script_copyright_message + exit 1 + else + echo "The Regenie Quantative traits step1 output directory exists. Setting it." + STEP1_QT_OUTPUT_DIR=${OUTPUT_DIR}/regenie_QT_step1 + fi + echo "" + fi + + if [ "${TRAIT_TYPE}" == 'BINARY' ]; then + echo "* Running REGENIE for BINARY traits" + if [ ! -d ${OUTPUT_DIR}/regenie_BT_step2 ]; then + echo "The Regenie Binary traits step2 output directory does not exist. Making and setting it." + mkdir -v ${OUTPUT_DIR}/regenie_BT_step2 + STEP2_BT_OUTPUT_DIR=${OUTPUT_DIR}/regenie_BT_step2 + else + echo "The Regenie Binary traits step2 output directory already exists. Setting it." + STEP2_BT_OUTPUT_DIR=${OUTPUT_DIR}/regenie_BT_step2 + fi + echo "" + + if [ ! -d ${OUTPUT_DIR}/regenie_BT_step1 ]; then + script_arguments_error "Regenie step 1 does not seem to be executed or the directory could not be found!" + script_copyright_message + exit 1 + else + echo "The Regenie Binary traits step1 output directory exists. Setting it." + STEP1_BT_OUTPUT_DIR=${OUTPUT_DIR}/regenie_BT_step1 + fi + echo "" + fi + + echo "" + + if [ "${TRAIT_TYPE}" == 'QUANTATIVE' ]; then + echo "All arguments are passed. These are the settings:" + echo " * SAMPLE FILE: ${SAMPLE_FILE}" + echo " * QUANTATIVE COVARIATES: ${COVARIATE_QUANTATIVE}" + echo " * BINARY COVARIATES: ${COVARIATE_BINARY}" + echo " * QUANTATIVE PHENOTYPE: ${PHENOTYPE_QUANTATIVE}" + echo " * BINARY PHENOTYPE: -NOT USED-" + echo "" + echo " * BLOCK SIZE: ${REGENIE_STEP2_BZISE}" + echo "" + echo "" + echo "Start processing REGENIE step 2 for QUANTATIVE traits" + echo "" + for chr in {1..22}; do $REGENIE \ + --step 2 \ + --pgen ${IMPUTEDDATA}${chr} \ + --covarFile ${SAMPLE_FILE} \ + --covarColList ${COVARIATE_QUANTATIVE} \ + --catCovarList ${COVARIATE_BINARY} \ + --phenoFile ${SAMPLE_FILE} \ + --phenoColList ${PHENOTYPE_QUANTATIVE} \ + --firth --approx --pThresh 0.01\ + --pred ${STEP1_QT_OUTPUT_DIR}/aegscombo_topmed_step1_pred.list \ + --bsize="${REGENIE_STEP2_BZISE}" \ + --out ${STEP2_QT_OUTPUT_DIR}/aegscombo_step2_topmed_r3_f10_b38.chr${chr}; done + + elif [ "${TRAIT_TYPE}" == 'BINARY' ]; then + echo "All arguments are passed. These are the settings:" + echo " * SAMPLE FILE: ${SAMPLE_FILE}" + echo " * QUANTATIVE COVARIATES: ${COVARIATE_QUANTATIVE}" + echo " * BINARY COVARIATES: ${COVARIATE_BINARY}" + echo " * QUANTATIVE PHENOTYPE: -NOT USED-" + echo " * BINARY PHENOTYPE: ${PHENOTYPE_BINARY}" + echo "" + echo " * BLOCK SIZE: ${REGENIE_STEP2_BZISE}" + echo "" + echo "" + echo "Start processing REGENIE step 2 for BINARY traits" + echo "" + for chr in {1..22}; do $REGENIE \ + --step 2 \ + --pgen ${IMPUTEDDATA}${chr} \ + --covarFile ${SAMPLE_FILE} \ + --covarColList ${COVARIATE_QUANTATIVE} \ + --catCovarList ${COVARIATE_BINARY} \ + --phenoFile ${SAMPLE_FILE} \ + --phenoColList ${PHENOTYPE_BINARY} \ + --firth --approx --pThresh 0.01\ + --pred ${STEP1_BT_OUTPUT_DIR}/aegscombo_topmed_step1_pred.list \ + --bsize="${REGENIE_STEP2_BZISE}" \ + --bt \ + --out ${STEP2_BT_OUTPUT_DIR}/aegscombo_step2_topmed_r3_f10_b38.chr${chr}; done + fi + +### END of if-else statement for the number of command-line arguments passed ### +fi + +# script_copyright_message diff --git a/gwastoolkit.regenie.wrapper.sh b/gwastoolkit.regenie.wrapper.sh new file mode 100755 index 0000000..c07081d --- /dev/null +++ b/gwastoolkit.regenie.wrapper.sh @@ -0,0 +1,243 @@ +#!/bin/bash + +### Creating display functions +### Setting colouring +NONE='\033[00m' +OPAQUE='\033[2m' +FLASHING='\033[5m' +BOLD='\033[1m' +ITALIC='\033[3m' +UNDERLINE='\033[4m' +STRIKETHROUGH='\033[9m' + +RED='\033[01;31m' +GREEN='\033[01;32m' +YELLOW='\033[01;33m' +PURPLE='\033[01;35m' +CYAN='\033[01;36m' +WHITE='\033[01;37m' + +function echobold { #'echobold' is the function name + echo -e "${BOLD}${1}${NONE}" # this is whatever the function needs to execute, note ${1} is the text for echo +} +function echoitalic { + echo -e "${ITALIC}${1}${NONE}" +} +function echonooption { + echo -e "${OPAQUE}${RED}${1}${NONE}" +} +function echoerrorflash { + echo -e "${RED}${BOLD}${FLASHING}${1}${NONE}" +} +function echoerror { + echo -e "${RED}${1}${NONE}" +} +# errors no option +function echoerrornooption { + echo -e "${YELLOW}${1}${NONE}" +} +function echoerrorflashnooption { + echo -e "${YELLOW}${BOLD}${FLASHING}${1}${NONE}" +} + +### MESSAGE FUNCTIONS +script_copyright_message() { + echo "" + THISYEAR=$(date +'%Y') + echo "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + echo "+ The MIT License (MIT) +" + echo "+ Copyright (c) 2015-${THISYEAR} Tim S. Peters +" + echo "+ +" + echo "+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and +" + echo "+ associated documentation files (the \"Software\"), to deal in the Software without restriction, +" + echo "+ including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, +" + echo "+ and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, +" + echo "+ subject to the following conditions: +" + echo "+ +" + echo "+ The above copyright notice and this permission notice shall be included in all copies or substantial +" + echo "+ portions of the Software. +" + echo "+ +" + echo "+ THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT +" + echo "+ NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +" + echo "+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES +" + echo "+ OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +" + echo "+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +" + echo "+ +" + echo "+ Reference: http://opensource.org. +" + echo "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" +} +script_arguments_error() { + echoerror "$1" # ERROR MESSAGE + echoerror "" + echoerror "- Argument #1 is path_to the configuration file." + echoerror "- Argument #2 is determining whether to run regenie for BINARY or QUANTATIVE traits." + echoerror "" + echoerror "An example command would be: gwastoolkit.regenie.step2.sh [arg1: path_to_configuration_file] [args2: BINARY/QUANTATIVE]" + echoerror "" + echoerror "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + # The wrong arguments are passed, so we'll exit the script now! + exit 1 +} + +echobold "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" +echobold " GWASTOOLKIT REGENIE WRAPPER" +echobold " Wrapup of REGENIE analysis" +echobold "" +echobold " Version : v1.0.0" +echobold "" +echobold " Last update: 2024-08-29" +echobold " Written by : Tim S. Peters (t.s.peters-4@umcutrecht.nl)." +echobold "" +echobold " Testers: - " +echobold "" +echobold "" +echobold "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + +### LOADING CONFIGURATION FILE +source "$1" # Depends on arg1. + +### REQUIRED | GENERALS +CONFIGURATIONFILE="$1" # Depends on arg1 -- but also on where it resides!!! +TRAIT_TYPE="$2" # Depends on arg1 -- but also on where it resides!!! + +### START of if-else statement for the number of command-line arguments passed ### +if [[ $# -lt 2 ]]; then + echo "Oh, computer says no! Number of arguments found "$#"." + script_arguments_error "You must supply [2] arguments for cleaning of *** GENOME-WIDE ANALYSIS *** results!" + script_copyright_message + +else + if [[ "${TRAIT_TYPE}" != 'BINARY' && "${TRAIT_TYPE}" != 'QUANTATIVE' ]]; then + echo "Oh, computer says no! Second argument is not correct" + script_arguments_error "Argument 2 should be either 'BINARY' or 'QUANTATIVE'!" + script_copyright_message + exit 1 + fi + echo "" + echo "" + ### Make and/or set the output directory + if [ ! -d ${PROJECTDIR}/${PROJECTNAME}/regenie_results ]; then + echo "The output directory does not exist. Making and setting it." + mkdir -v ${PROJECTDIR}/${PROJECTNAME}/regenie_results + OUTPUT_DIR=${PROJECTDIR}/${PROJECTNAME}/regenie_results + else + echo "The output directory already exists. Setting it." + OUTPUT_DIR=${PROJECTDIR}/${PROJECTNAME}/regenie_results + fi + echo "" + + if [ "${TRAIT_TYPE}" == 'QUANTATIVE' ]; then + echo "* Running REGENIE for QUANTATIVE traits" + if [ ! -d ${OUTPUT_DIR}/regenie_QT_step2 ]; then + script_arguments_error "Regenie step 2 does not seem to be executed or the directory could not be found!" + script_copyright_message + exit 1 + else + echo "The Regenie Quantative traits step2 output directory already exists. Setting it." + STEP2_QT_OUTPUT_DIR=${OUTPUT_DIR}/regenie_QT_step2 + fi + echo "" + fi + + if [ "${TRAIT_TYPE}" == 'BINARY' ]; then + echo "* Running REGENIE for BINARY traits" + if [ ! -d ${OUTPUT_DIR}/regenie_BT_step2 ]; then + script_arguments_error "Regenie step 2 does not seem to be executed or the directory could not be found!" + script_copyright_message + exit 1 + else + echo "The Regenie Binary traits step2 output directory already exists. Setting it." + STEP2_BT_OUTPUT_DIR=${OUTPUT_DIR}/regenie_BT_step2 + fi + echo "" + fi + + if [ ! -d ${OUTPUT_DIR}/regenie_OUT ]; then + echo "The Regenie output directory does not exist. Making and setting it." + mkdir -v ${OUTPUT_DIR}/regenie_OUT + REGENIE_FINAL_DIR=${OUTPUT_DIR}/regenie_OUT + else + echo "The Regenie output directory already exists. Setting it." + REGENIE_FINAL_DIR=${OUTPUT_DIR}/regenie_OUT + fi + echo "" + echo "" + + # Save the current IFS + OLD_IFS=$IFS + + if [ "${TRAIT_TYPE}" == 'QUANTATIVE' ]; then + echo "All arguments are passed. These are the settings:" + echo " * SAMPLE FILE: ${SAMPLE_FILE}" + echo " * QUANTATIVE COVARIATES: ${COVARIATE_QUANTATIVE}" + echo " * BINARY COVARIATES: ${COVARIATE_BINARY}" + echo " * QUANTATIVE PHENOTYPE: ${PHENOTYPE_QUANTATIVE}" + echo " * BINARY PHENOTYPE: -NOT USED-" + echo "" + echo " * BLOCK SIZE: ${REGENIE_STEP2_BZISE}" + echo "" + echo "" + echo "Start processing REGENIE step 2 for QUANTATIVE traits" + echo "" + + IFS=',' # Set comma as the delimiter + for PHENOTYPE_ITEM in $PHENOTYPE_QUANTATIVE; do + echo "Wrapping up for QUANTATIVE PHENOTYPE: ${PHENOTYPE_ITEM}" + # create results file + ### 1 2 3 4 5 6 7 8 9 10 11 12 CALC 13 CALC 14 15 16 17 # AUTOSOMAL & X CHROMOSOMES + # echo "ALTID RSID CHR BP OtherAlleleA CodedAlleleB AvgMaxPostCall Info all_AA all_AB all_BB TotalN MAC MAF CAF HWE P BETA SE" > ${PHENO_OUTPUT_DIR}/${STUDY_TYPE}.${ANALYSIS_TYPE}.${REFERENCE}.${PHENOTYPE}.${EXCLUSION}.summary_results.txt + echo "CHROM GENPOS ID ALLELE0 ALLELE1 A1FREQ N TEST BETA SE CHISQ LOG10P EXTRA" > ${REGENIE_FINAL_DIR}/regenie.${PHENOTYPE_ITEM}.summary_results.txt + + # Reset IFS to its original value + IFS=$OLD_IFS + for CHR in $(seq 1 22); do + # which chromosome are we processing? + echo "Processing chromosome ${CHR}..." + cat ${STEP2_QT_OUTPUT_DIR}/*.chr${CHR}_${PHENOTYPE_ITEM}.regenie | grep -v "#" | tail -n +2 | awk ' { print $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13 } ' >> ${REGENIE_FINAL_DIR}/regenie.${PHENOTYPE_ITEM}.summary_results.txt + echo "/////////////////////////////////////////////////////////////////////////////////////////////////////////" + echo "" + done + echo "" + gzip -vf ${REGENIE_FINAL_DIR}/regenie.${PHENOTYPE_ITEM}.summary_results.txt + done + + elif [ "${TRAIT_TYPE}" == 'BINARY' ]; then + echo "All arguments are passed. These are the settings:" + echo " * SAMPLE FILE: ${SAMPLE_FILE}" + echo " * QUANTATIVE COVARIATES: ${COVARIATE_QUANTATIVE}" + echo " * BINARY COVARIATES: ${COVARIATE_BINARY}" + echo " * QUANTATIVE PHENOTYPE: -NOT USED-" + echo " * BINARY PHENOTYPE: ${PHENOTYPE_BINARY}" + echo "" + echo " * BLOCK SIZE: ${REGENIE_STEP2_BZISE}" + echo "" + echo "" + echo "Start processing REGENIE step 2 for BINARY traits" + echo "" + + IFS=',' # Set comma as the delimiter + for PHENOTYPE_ITEM in $PHENOTYPE_BINARY; do + echo "Wrapping up for BINARY PHENOTYPE: ${PHENOTYPE_ITEM}" + # create results file + ### 1 2 3 4 5 6 7 8 9 10 11 12 CALC 13 CALC 14 15 16 17 # AUTOSOMAL & X CHROMOSOMES + # echo "ALTID RSID CHR BP OtherAlleleA CodedAlleleB AvgMaxPostCall Info all_AA all_AB all_BB TotalN MAC MAF CAF HWE P BETA SE" > ${PHENO_OUTPUT_DIR}/${STUDY_TYPE}.${ANALYSIS_TYPE}.${REFERENCE}.${PHENOTYPE}.${EXCLUSION}.summary_results.txt + echo "CHROM GENPOS ID ALLELE0 ALLELE1 A1FREQ N TEST BETA SE CHISQ LOG10P EXTRA" > ${REGENIE_FINAL_DIR}/regenie.${PHENOTYPE_ITEM}.summary_results.txt + + # Reset IFS to its original value + IFS=$OLD_IFS + for CHR in $(seq 1 22); do + # which chromosome are we processing? + echo "Processing chromosome ${CHR}..." + cat ${STEP2_BT_OUTPUT_DIR}/*.chr${CHR}_${PHENOTYPE_ITEM}.regenie | grep -v "#" | tail -n +2 | awk ' { print $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13 } ' >> ${REGENIE_FINAL_DIR}/regenie.${PHENOTYPE_ITEM}.summary_results.txt + echo "/////////////////////////////////////////////////////////////////////////////////////////////////////////" + echo "" + done + echo "" + gzip -vf ${REGENIE_FINAL_DIR}/regenie.${PHENOTYPE_ITEM}.summary_results.txt + done + fi + +### END of if-else statement for the number of command-line arguments passed ### +fi + +# script_copyright_message diff --git a/gwastoolkit.run.sh b/gwastoolkit.run.sh index fa64708..30ed4d7 100755 --- a/gwastoolkit.run.sh +++ b/gwastoolkit.run.sh @@ -95,6 +95,22 @@ script_arguments_error_analysis_type() { exit 1 } +script_arguments_error_gwas_type() { + echo "$1" + echo "" + echo " *** ERROR *** ERROR --- $(basename "${0}") --- ERROR *** ERROR ***" + echo "" + echo " You must supply the correct argument for GWAS:" + echo " * [SNPTEST] -- genome-wide association study of traits in ${PHENOTYPE_FILE} using SNPTEST." + echo " * [REGENIE] -- genome-wide association study of traits in ${PHENOTYPE_FILE} using REGENIE." + echo "" + echo " Please refer to instruction above." + echo "" + echo "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" + # The wrong arguments are passed, so we'll exit the script now! + exit 1 +} + echobold "+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" echobold " GWASTOOLKIT" echobold " individual variant, per-gene, regional, or genome-wide association study of a trait" @@ -109,6 +125,7 @@ echobold " - Aisha Gohar (a.gohar@umcutrecht.nl)" echobold " - Jessica van Setten (j.vansetten@umcutrecht.nl)" echobold " - Tim Bezemer (t.bezemer-2@umcutrecht.nl)" echobold " - Lennart P.L. Landsmeer (l.p.l.landsmeer-2@umcutrecht.nl)" +echobold " - Tim S. Peters (t.s.peters-4@umcutrecht.nl)" echobold "" echobold " Description: Perform individual variant, regional or genome-wide association " echobold " analysis on some phenotype(s). It will do the following:" @@ -217,49 +234,152 @@ else ### SUBMIT SNPTEST_PHENO if [[ ${ANALYSIS_TYPE} = "GWAS" ]]; then - echo "Creating jobs to perform GWAS on your phenotype(s)..." - ### Sending analizer.sh to sbatch - ${GWASTOOLKITDIR}/gwastoolkit.analyzer.sh ${CONFIGURATIONFILE} ${DATE_TRACK} - ### Create QC bash-script to send to qsub - for PHENOTYPE in ${PHENOTYPES}; do + if [[ ${GWAS_TYPE} = "SNPTEST" ]]; then + echo "Creating jobs to perform SNPTEST GWAS on your phenotype(s)..." + ### Sending analizer.sh to sbatch + ${GWASTOOLKITDIR}/gwastoolkit.analyzer.sh ${CONFIGURATIONFILE} ${DATE_TRACK} + ### Create QC bash-script to send to qsub + for PHENOTYPE in ${PHENOTYPES}; do + + PHENO_OUTPUT_DIR=${PROJECT}/snptest_results/${PHENOTYPE} + + printf "%s\n" "#!/bin/bash" "#" "${GWASTOOLKITDIR}/gwastoolkit.qc.sh ${CONFIGURATIONFILE} ${PHENOTYPE} " > ${PHENO_OUTPUT_DIR}/qc.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh + ### Submit QC script + ### The option '-hold_jid' indicates that the following qsub will not start until all jobs with '-N SOMENAMEFORTHESCRIPT' are finished + JOB_ID_QC=$(sbatch --parsable -J QC.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION} --depend=afterany:$(squeue --noheader --format %i --name ANALYZER.DONE.${DATE_TRACK}) -o ${PHENO_OUTPUT_DIR}/qc.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.log -e ${PHENO_OUTPUT_DIR}/qc.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.errors --mem=${QMEMGWAS} -t ${QTIMEGWAS} --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PHENO_OUTPUT_DIR} ${PHENO_OUTPUT_DIR}/qc.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh) + echo "" + + ### Create plotter bash-script to send to qsub + printf "%s\n" "#!/bin/bash" "#" "${GWASTOOLKITDIR}/gwastoolkit.plotter.sh ${CONFIGURATIONFILE} ${PHENOTYPE} " > ${PHENO_OUTPUT_DIR}/plotter.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh + ### Submit plotter script + JOB_ID_PLOTTER=$(sbatch --parsable -J PLOTTER.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION} --depend=afterany:$(squeue --noheader --format %i --name ANALYZER.DONE.${DATE_TRACK}) -o ${PHENO_OUTPUT_DIR}/plotter.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.log -e ${PHENO_OUTPUT_DIR}/plotter.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.errors --mem=${QMEMGWASPLOT} -t ${QTIMEGWASPLOT} --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PHENO_OUTPUT_DIR} ${PHENO_OUTPUT_DIR}/plotter.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh) + echo "" + + ### Create QC plotter bash-script to send to qsub + printf "%s\n" "#!/bin/bash" "#" "${GWASTOOLKITDIR}/gwastoolkit.plotter.qc.sh ${CONFIGURATIONFILE} ${PHENOTYPE} " > ${PHENO_OUTPUT_DIR}/qcplotter.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh + ### Submit QC plotter script + JOB_ID_QCPLOTTER=$(sbatch --parsable -J QCPLOTTER.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION} --depend=afterany:${JOB_ID_QC} -o ${PHENO_OUTPUT_DIR}/qcplotter.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.log -e ${PHENO_OUTPUT_DIR}/qcplotter.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.errors --mem=${QMEMGWASPLOTQC} -t ${QTIMEGWASPLOTQC} --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PHENO_OUTPUT_DIR} ${PHENO_OUTPUT_DIR}/qcplotter.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh) + echo "" + + #### Create clumper bash-script to send to qsub + printf "%s\n" "#!/bin/bash" "#" "${GWASTOOLKITDIR}/gwastoolkit.clumper.sh ${CONFIGURATIONFILE} ${PHENOTYPE} " > ${PHENO_OUTPUT_DIR}/clumper.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh + #### Submit clumper script + JOB_ID_CLUMPER=$(sbatch --parsable -J CLUMPER.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION} --depend=afterany:${JOB_ID_QC} -o ${PHENO_OUTPUT_DIR}/clumper.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.log -e ${PHENO_OUTPUT_DIR}/clumper.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.errors --mem=${QMEMGWASCLUMP} -t ${QTIMEGWASCLUMP} --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PHENO_OUTPUT_DIR} ${PHENO_OUTPUT_DIR}/clumper.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh) + echo "" + + ##### Create locuszoom bash-script to send to qsub + printf "%s\n" "#!/bin/bash" "#" "${GWASTOOLKITDIR}/gwastoolkit.locuszoomer.sh ${CONFIGURATIONFILE} ${PHENOTYPE} " > ${PHENO_OUTPUT_DIR}/locuszoom.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh + ##### Submit locuszoom script + JOB_ID_LZ=$(sbatch --parsable -J LZ.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION} --depend=afterany:${JOB_ID_CLUMPER} -o ${PHENO_OUTPUT_DIR}/locuszoom.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.log -e ${PHENO_OUTPUT_DIR}/locuszoom.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.errors --mem=${QMEMGWASLZOOM} -t ${QTIMEGWASLZOOM} --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PHENO_OUTPUT_DIR} ${PHENO_OUTPUT_DIR}/locuszoom.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh) + + ##### Create cleaner bash-script to send to qsub + printf "%s\n" "#!/bin/bash" "#" "${GWASTOOLKITDIR}/gwastoolkit.cleaner.sh ${CONFIGURATIONFILE} ${PHENOTYPE} " > ${PHENO_OUTPUT_DIR}/cleaner.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh + ##### Submit cleaner script + JOB_ID_CLEANER=$(sbatch --parsable -J CLEANER.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION} --depend=afterany:${JOB_ID_LZ} -o ${PHENO_OUTPUT_DIR}/cleaner.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.log -e ${PHENO_OUTPUT_DIR}/cleaner.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.errors --mem=${QMEMGWASCLEANER} -t ${QTIMEGWASCLEANER} --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PHENO_OUTPUT_DIR} ${PHENO_OUTPUT_DIR}/cleaner.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh) - PHENO_OUTPUT_DIR=${PROJECT}/snptest_results/${PHENOTYPE} + done - printf "%s\n" "#!/bin/bash" "#" "${GWASTOOLKITDIR}/gwastoolkit.qc.sh ${CONFIGURATIONFILE} ${PHENOTYPE} " > ${PHENO_OUTPUT_DIR}/qc.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh - ### Submit QC script - ### The option '-hold_jid' indicates that the following qsub will not start until all jobs with '-N SOMENAMEFORTHESCRIPT' are finished - JOB_ID_QC=$(sbatch --parsable -J QC.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION} --depend=afterany:$(squeue --noheader --format %i --name ANALYZER.DONE.${DATE_TRACK}) -o ${PHENO_OUTPUT_DIR}/qc.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.log -e ${PHENO_OUTPUT_DIR}/qc.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.errors --mem=${QMEMGWAS} -t ${QTIMEGWAS} --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PHENO_OUTPUT_DIR} ${PHENO_OUTPUT_DIR}/qc.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh) - echo "" + elif [[ ${GWAS_TYPE} = "REGENIE" ]]; then + echo "Creating jobs to perform REGENIE GWAS on your phenotype(s)..." - ### Create plotter bash-script to send to qsub - printf "%s\n" "#!/bin/bash" "#" "${GWASTOOLKITDIR}/gwastoolkit.plotter.sh ${CONFIGURATIONFILE} ${PHENOTYPE} " > ${PHENO_OUTPUT_DIR}/plotter.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh - ### Submit plotter script - JOB_ID_PLOTTER=$(sbatch --parsable -J PLOTTER.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION} --depend=afterany:$(squeue --noheader --format %i --name ANALYZER.DONE.${DATE_TRACK}) -o ${PHENO_OUTPUT_DIR}/plotter.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.log -e ${PHENO_OUTPUT_DIR}/plotter.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.errors --mem=${QMEMGWASPLOT} -t ${QTIMEGWASPLOT} --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PHENO_OUTPUT_DIR} ${PHENO_OUTPUT_DIR}/plotter.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh) - echo "" + ### vcf.gx -> pgen ?? - ### Create QC plotter bash-script to send to qsub - printf "%s\n" "#!/bin/bash" "#" "${GWASTOOLKITDIR}/gwastoolkit.plotter.qc.sh ${CONFIGURATIONFILE} ${PHENOTYPE} " > ${PHENO_OUTPUT_DIR}/qcplotter.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh - ### Submit QC plotter script - JOB_ID_QCPLOTTER=$(sbatch --parsable -J QCPLOTTER.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION} --depend=afterany:${JOB_ID_QC} -o ${PHENO_OUTPUT_DIR}/qcplotter.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.log -e ${PHENO_OUTPUT_DIR}/qcplotter.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.errors --mem=${QMEMGWASPLOTQC} -t ${QTIMEGWASPLOTQC} --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PHENO_OUTPUT_DIR} ${PHENO_OUTPUT_DIR}/qcplotter.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh) - echo "" + ### merge seperate pgen chromosomes into one - #### Create clumper bash-script to send to qsub - printf "%s\n" "#!/bin/bash" "#" "${GWASTOOLKITDIR}/gwastoolkit.clumper.sh ${CONFIGURATIONFILE} ${PHENOTYPE} " > ${PHENO_OUTPUT_DIR}/clumper.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh - #### Submit clumper script - JOB_ID_CLUMPER=$(sbatch --parsable -J CLUMPER.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION} --depend=afterany:${JOB_ID_QC} -o ${PHENO_OUTPUT_DIR}/clumper.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.log -e ${PHENO_OUTPUT_DIR}/clumper.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.errors --mem=${QMEMGWASCLUMP} -t ${QTIMEGWASCLUMP} --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PHENO_OUTPUT_DIR} ${PHENO_OUTPUT_DIR}/clumper.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh) + ### Create QC Pre-processing bash-script to send to sbatch + echo "SUBMITTING JOB: Regenie Quality Control (Pre-processing)" + printf "%s\n" "#!/bin/bash" "#" "${GWASTOOLKITDIR}/gwastoolkit.regenie.qc.sh ${CONFIGURATIONFILE} " > ${PROJECT}/regenie.qc.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh + ### Submit QC Pre-processing script + ### The option '-hold_jid' indicates that the following qsub will not start until all jobs with '-N SOMENAMEFORTHESCRIPT' are finished + JOB_ID_QC=$(sbatch --parsable -J REGENIE.QC.${STUDY_TYPE}.${ANALYSIS_TYPE} -o ${PROJECT}/regenie.qc.${STUDY_TYPE}.${ANALYSIS_TYPE}.log -e ${PROJECT}/regenie.qc.${STUDY_TYPE}.${ANALYSIS_TYPE}.errors --mem=${QMEMGWASREGENIE} -t ${QTIMEGWASREGENIE} --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PROJECT} ${PROJECT}/regenie.qc.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh) echo "" - ##### Create locuszoom bash-script to send to qsub - printf "%s\n" "#!/bin/bash" "#" "${GWASTOOLKITDIR}/gwastoolkit.locuszoomer.sh ${CONFIGURATIONFILE} ${PHENOTYPE} " > ${PHENO_OUTPUT_DIR}/locuszoom.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh - ##### Submit locuszoom script - JOB_ID_LZ=$(sbatch --parsable -J LZ.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION} --depend=afterany:${JOB_ID_CLUMPER} -o ${PHENO_OUTPUT_DIR}/locuszoom.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.log -e ${PHENO_OUTPUT_DIR}/locuszoom.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.errors --mem=${QMEMGWASLZOOM} -t ${QTIMEGWASLZOOM} --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PHENO_OUTPUT_DIR} ${PHENO_OUTPUT_DIR}/locuszoom.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh) - - ##### Create cleaner bash-script to send to qsub - printf "%s\n" "#!/bin/bash" "#" "${GWASTOOLKITDIR}/gwastoolkit.cleaner.sh ${CONFIGURATIONFILE} ${PHENOTYPE} " > ${PHENO_OUTPUT_DIR}/cleaner.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh - ##### Submit cleaner script - JOB_ID_CLEANER=$(sbatch --parsable -J CLEANER.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION} --depend=afterany:${JOB_ID_LZ} -o ${PHENO_OUTPUT_DIR}/cleaner.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.log -e ${PHENO_OUTPUT_DIR}/cleaner.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.errors --mem=${QMEMGWASCLEANER} -t ${QTIMEGWASCLEANER} --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PHENO_OUTPUT_DIR} ${PHENO_OUTPUT_DIR}/cleaner.${STUDY_TYPE}.${ANALYSIS_TYPE}.${PHENOTYPE}.${EXCLUSION}.sh) + ##### Running REGENIE for BINARY Traits + if [ -n "$PHENOTYPE_BINARY" ]; then + echo "*** BINARY Phenotypes found ***" + echo "- SUBMITTING JOB: Regenie Step 1" + ##### Regenie step 1 + printf "%s\n" "#!/bin/bash" "#" "${GWASTOOLKITDIR}/gwastoolkit.regenie.step1.sh ${CONFIGURATIONFILE} BINARY " > ${PROJECT}/regenie.BT.step1.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh + ### Submit QC Pre-processing script + ### The option '-hold_jid' indicates that the following qsub will not start until all jobs with '-N SOMENAMEFORTHESCRIPT' are finished + JOB_ID_REGENIE_BT_STEP1=$(sbatch --parsable -J REGENIE.BT.STEP1.${STUDY_TYPE}.${ANALYSIS_TYPE} --depend=afterany:${JOB_ID_QC} -o ${PROJECT}/regenie.BT.step1.${STUDY_TYPE}.${ANALYSIS_TYPE}.log -e ${PROJECT}/regenie.BT.step1.${STUDY_TYPE}.${ANALYSIS_TYPE}.errors --mem=${QMEMGWASREGENIE1} -t ${QTIMEGWASREGENIE1} -c 8 --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PROJECT} ${PROJECT}/regenie.BT.step1.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh) + + # DEBUG + # JOB_ID_REGENIE_BT_STEP1=$(sbatch --parsable -J REGENIE.BT.STEP1.${STUDY_TYPE}.${ANALYSIS_TYPE} -o ${PROJECT}/regenie.BT.step1.${STUDY_TYPE}.${ANALYSIS_TYPE}.log -e ${PROJECT}/regenie.BT.step1.${STUDY_TYPE}.${ANALYSIS_TYPE}.errors --mem=${QMEMGWASREGENIE1} -t ${QTIMEGWASREGENIE1} -c 8 --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PROJECT} ${PROJECT}/regenie.BT.step1.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh) + echo "" + + echo "- SUBMITTING JOB: Regenie Step 2" + ##### Regenie step 2 + printf "%s\n" "#!/bin/bash" "#" "${GWASTOOLKITDIR}/gwastoolkit.regenie.step2.sh ${CONFIGURATIONFILE} BINARY " > ${PROJECT}/regenie.BT.step2.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh + ### Submit QC Pre-processing script + ### The option '-hold_jid' indicates that the following qsub will not start until all jobs with '-N SOMENAMEFORTHESCRIPT' are finished + JOB_ID_REGENIE_BT_STEP2=$(sbatch --parsable -J REGENIE.BT.STEP2.${STUDY_TYPE}.${ANALYSIS_TYPE} --depend=afterany:${JOB_ID_REGENIE_BT_STEP1} -o ${PROJECT}/regenie.BT.step2.${STUDY_TYPE}.${ANALYSIS_TYPE}.log -e ${PROJECT}/regenie.BT.step2.${STUDY_TYPE}.${ANALYSIS_TYPE}.errors --mem=${QMEMGWASREGENIE2} -t ${QTIMEGWASREGENIE2} -c 8 --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PROJECT} ${PROJECT}/regenie.BT.step2.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh) + + # DEBUG + # JOB_ID_REGENIE_BT_STEP2=$(sbatch --parsable -J REGENIE.BT.STEP2.${STUDY_TYPE}.${ANALYSIS_TYPE} -o ${PROJECT}/regenie.BT.step2.${STUDY_TYPE}.${ANALYSIS_TYPE}.log -e ${PROJECT}/regenie.BT.step2.${STUDY_TYPE}.${ANALYSIS_TYPE}.errors --mem=${QMEMGWASREGENIE2} -t ${QTIMEGWASREGENIE2} -c 8 --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PROJECT} ${PROJECT}/regenie.BT.step2.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh) + echo "" + + echo "- SUBMITTING JOB: Regenie Wrapper" + ##### Regenie wrapup + printf "%s\n" "#!/bin/bash" "#" "${GWASTOOLKITDIR}/gwastoolkit.regenie.wrapper.sh ${CONFIGURATIONFILE} BINARY " > ${PROJECT}/regenie.BT.wrapper.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh + ### Submit QC Pre-processing script + ### The option '-hold_jid' indicates that the following qsub will not start until all jobs with '-N SOMENAMEFORTHESCRIPT' are finished + JOB_ID_REGENIE_BT_WRAPPER=$(sbatch --parsable -J REGENIE.BT.wrapper.${STUDY_TYPE}.${ANALYSIS_TYPE} --depend=afterany:${JOB_ID_REGENIE_BT_STEP2} -o ${PROJECT}/regenie.BT.wrapper.${STUDY_TYPE}.${ANALYSIS_TYPE}.log -e ${PROJECT}/regenie.BT.wrapper.${STUDY_TYPE}.${ANALYSIS_TYPE}.errors --mem=${QMEMGWASREGENIEWRAP} -t ${QTIMEGWASREGENIEWRAP} -c 8 --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PROJECT} ${PROJECT}/regenie.BT.wrapper.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh) + + # DEBUG + # JOB_ID_REGENIE_BT_WRAPPER=$(sbatch --parsable -J REGENIE.BT.wrapper.${STUDY_TYPE}.${ANALYSIS_TYPE} -o ${PROJECT}/regenie.BT.wrapper.${STUDY_TYPE}.${ANALYSIS_TYPE}.log -e ${PROJECT}/regenie.BT.wrapper.${STUDY_TYPE}.${ANALYSIS_TYPE}.errors --mem=${QMEMGWASREGENIEWRAP} -t ${QTIMEGWASREGENIEWRAP} -c 8 --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PROJECT} ${PROJECT}/regenie.BT.wrapper.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh) + echo "" + echo "" + else + echo "*** No BINARY Phenotypes give ***" + echo "" + fi + + ##### Running REGENIE for QUANTATIVE Traits + if [ -n "$PHENOTYPE_QUANTATIVE" ]; then + echo "*** QUANTATIVE Phenotypes found ***" + echo "- SUBMITTING JOB: Regenie Step 1" + ##### Regenie step 1 + printf "%s\n" "#!/bin/bash" "#" "${GWASTOOLKITDIR}/gwastoolkit.regenie.step1.sh ${CONFIGURATIONFILE} QUANTATIVE " > ${PROJECT}/regenie.QT.step1.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh + ### Submit QC Pre-processing script + ### The option '-hold_jid' indicates that the following qsub will not start until all jobs with '-N SOMENAMEFORTHESCRIPT' are finished + JOB_ID_REGENIE_QT_STEP1=$(sbatch --parsable -J REGENIE.QT.STEP1.${STUDY_TYPE}.${ANALYSIS_TYPE} --depend=afterany:${JOB_ID_QC} -o ${PROJECT}/regenie.QT.step1.${STUDY_TYPE}.${ANALYSIS_TYPE}.log -e ${PROJECT}/regenie.QT.step1.${STUDY_TYPE}.${ANALYSIS_TYPE}.errors --mem=${QMEMGWASREGENIE1} -t ${QTIMEGWASREGENIE1} -c 8 --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PROJECT} ${PROJECT}/regenie.QT.step1.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh) + + # DEBUG + # JOB_ID_REGENIE_QT_STEP1=$(sbatch --parsable -J REGENIE.QT.STEP1.${STUDY_TYPE}.${ANALYSIS_TYPE} -o ${PROJECT}/regenie.QT.step1.${STUDY_TYPE}.${ANALYSIS_TYPE}.log -e ${PROJECT}/regenie.QT.step1.${STUDY_TYPE}.${ANALYSIS_TYPE}.errors --mem=${QMEMGWASREGENIE1} -t ${QTIMEGWASREGENIE1} -c 8 --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PROJECT} ${PROJECT}/regenie.QT.step1.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh) + echo "" + + echo "- SUBMITTING JOB: Regenie Step 2" + ##### Regenie step 2 + printf "%s\n" "#!/bin/bash" "#" "${GWASTOOLKITDIR}/gwastoolkit.regenie.step2.sh ${CONFIGURATIONFILE} QUANTATIVE " > ${PROJECT}/regenie.QT.step2.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh + ### Submit QC Pre-processing script + ### The option '-hold_jid' indicates that the following qsub will not start until all jobs with '-N SOMENAMEFORTHESCRIPT' are finished + JOB_ID_REGENIE_QT_STEP2=$(sbatch --parsable -J REGENIE.QT.STEP2.${STUDY_TYPE}.${ANALYSIS_TYPE} --depend=afterany:${JOB_ID_REGENIE_QT_STEP1} -o ${PROJECT}/regenie.QT.step2.${STUDY_TYPE}.${ANALYSIS_TYPE}.log -e ${PROJECT}/regenie.QT.step2.${STUDY_TYPE}.${ANALYSIS_TYPE}.errors --mem=${QMEMGWASREGENIE2} -t ${QTIMEGWASREGENIE2} -c 8 --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PROJECT} ${PROJECT}/regenie.QT.step2.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh) + + # DEBUG + # JOB_ID_REGENIE_QT_STEP2=$(sbatch --parsable -J REGENIE.QT.STEP2.${STUDY_TYPE}.${ANALYSIS_TYPE} -o ${PROJECT}/regenie.QT.step2.${STUDY_TYPE}.${ANALYSIS_TYPE}.log -e ${PROJECT}/regenie.QT.step2.${STUDY_TYPE}.${ANALYSIS_TYPE}.errors --mem=${QMEMGWASREGENIE2} -t ${QTIMEGWASREGENIE2} -c 8 --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PROJECT} ${PROJECT}/regenie.QT.step2.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh) + echo "" + + echo "- SUBMITTING JOB: Regenie Wrapper" + ##### Regenie wrapup + printf "%s\n" "#!/bin/bash" "#" "${GWASTOOLKITDIR}/gwastoolkit.regenie.wrapper.sh ${CONFIGURATIONFILE} QUANTATIVE " > ${PROJECT}/regenie.QT.wrapper.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh + ### Submit QC Pre-processing script + ### The option '-hold_jid' indicates that the following qsub will not start until all jobs with '-N SOMENAMEFORTHESCRIPT' are finished + JOB_ID_REGENIE_QT_WRAPPER=$(sbatch --parsable -J REGENIE.QT.wrapper.${STUDY_TYPE}.${ANALYSIS_TYPE} --depend=afterany:${JOB_ID_REGENIE_QT_STEP2} -o ${PROJECT}/regenie.QT.wrapper.${STUDY_TYPE}.${ANALYSIS_TYPE}.log -e ${PROJECT}/regenie.QT.wrapper.${STUDY_TYPE}.${ANALYSIS_TYPE}.errors --mem=${QMEMGWASREGENIEWRAP} -t ${QTIMEGWASREGENIEWRAP} -c 8 --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PROJECT} ${PROJECT}/regenie.QT.wrapper.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh) + + # DEBUG + # JOB_ID_REGENIE_QT_WRAPPER=$(sbatch --parsable -J REGENIE.QT.wrapper.${STUDY_TYPE}.${ANALYSIS_TYPE} -o ${PROJECT}/regenie.QT.wrapper.${STUDY_TYPE}.${ANALYSIS_TYPE}.log -e ${PROJECT}/regenie.QT.wrapper.${STUDY_TYPE}.${ANALYSIS_TYPE}.errors --mem=${QMEMGWASREGENIEWRAP} -t ${QTIMEGWASREGENIEWRAP} -c 8 --mail-user=${YOUREMAIL} --mail-type=${MAILSETTINGS} -D ${PROJECT} ${PROJECT}/regenie.QT.wrapper.${STUDY_TYPE}.${ANALYSIS_TYPE}.sh) + echo "" + echo "" + else + echo "No QUANTATIVE Phenotypes give" + echo "" + fi + else + ### If arguments are not met then this error message will be displayed + script_arguments_error_gwas_type - done + fi elif [[ ${ANALYSIS_TYPE} = "VARIANT" ]]; then