Skip to content

Commit

Permalink
fix references to software in configuration file
Browse files Browse the repository at this point in the history
  • Loading branch information
swvanderlaan committed Sep 12, 2023
1 parent e183d9d commit cd5445e
Showing 1 changed file with 72 additions and 139 deletions.
211 changes: 72 additions & 139 deletions gwastoolkit.conf
Original file line number Diff line number Diff line change
@@ -1,24 +1,22 @@
### CONFIGURATION FILE FOR GWASTOOLKIT ###
# Precede your comments with a #-sign.
#
# Set the directory variables, the order doesn't matter.
# Don't end the directory variables with '/' (forward-slash)!
### Precede your comments with a #-sign.
###
### Set the directory variables, the order doesn't matter.
### Don't end the directory variables with '/' (forward-slash)!

# REQUIRED: Path_to where the software resides on the server.
SOFTWARE="/hpc/local/CentOS7/dhl_ec/software"
### REQUIRED: Path_to where the software resides on the server.
SOFTWARE="/hpc/local/Rocky8/dhl_ec/software"

# REQUIRED: Path_to where GWASToolKit resides on the server.
### REQUIRED: Path_to where GWASToolKit resides on the server.
GWASTOOLKITDIR="${SOFTWARE}/GWASToolKit"

# REQUIRED: Path_to support programs on the server
# SNPTEST="${SOFTWARE}/snptest_v2.5.2_CentOS6.5_x86_64_static/snptest_v2.5.2"
# SNPTEST="${SOFTWARE}/snptest_v2.5.4-beta3_linux_x86_64_static/snptest_v2.5.4-beta3"
SNPTEST="${SOFTWARE}/snptest_v2.5.6_CentOS_Linux7.8-x86_64_dynamic/snptest_v2.5.6"
PLINK2="${SOFTWARE}/plink_v1.9"
LOCUSZOOM13="/hpc/local/CentOS7/dhl_ec/software/locuszoom_1.3/bin/locuszoom"
### REQUIRED: Path_to support programs on the server
SNPTEST="${SOFTWARE}/snptest_v2.5.6_CentOS_Linux7.8.2003-x86_64_dynamic/snptest_v2.5.6"
PLINK2="${SOFTWARE}/plink_v1.90_beta7_20230116"
LOCUSZOOM13="${SOFTWARE}/locuszoom_1.3/bin/locuszoom"

# REQUIRED: SLURM settings -- these should work universally
# FOR GWAS
### REQUIRED: SLURM settings -- these should work universally
### FOR GWAS
QMEMGWAS="8G" # '8Gb' for GWAS
QTIMEGWAS="12:00:00" # 12 hours for GWAS
QMEMGWASCLUMP="164G" # 16Gb needed for clumping
Expand All @@ -36,19 +34,19 @@ QTIMEVAR="00:15:00" # 15mins for variants
QMEMVARCLEANER="4G" # 4Gb needed for cleaner
QTIMEVARCLEANER="01:00:00" # 1hours to clean

# FOR VARIANT
### FOR VARIANT
QMEMVAR="8G" # 8Gb for variants
QTIMEVAR="00:15:00" # 15mins for variants
QMEMVARCLEANER="4G" # 4Gb needed for cleaner
QTIMEVARCLEANER="01:00:00" # 1hours to clean

# FOR REGION
### FOR REGION
QMEMREG="8G" # 8Gb for regions
QTIMEREG="00:30:00" # 30mins for regions
QMEMREGCLEANER="4G" # 4Gb needed for cleaner
QTIMEREGCLEANER="01:00:00" # 1hours to clean

# FOR GENE
### FOR GENE
QMEMGENE="8G" # 8Gb for genes
QTIMEGENE="00:30:00" # 30 minutes for genes
QMEMGENEQC="4G" # 4 Gb for snptest qc
Expand All @@ -58,53 +56,52 @@ QTIMEGENELZOOM="00:15:00" #15mins for locuszoom
QMEMGENECLEANER="4G" # 4Gb needed for cleaner
QTIMEGENECLEANER="01:00:00" # 1hours to clean

# REQUIRED: mailing settings
# you're e-mail address; you'll get an email when the job has ended or when it was aborted
# 'BEGIN' Mail is sent at the beginning of the job;
# 'END' Mail is sent at the end of the job;
# 'FAIL' Mail is sent when the job fails.
# 'REQUEUE' Mail is sent when the job is re-queued;
# 'ALL' Mail sent for all the above.
### REQUIRED: mailing settings
### Your e-mail address; you'll get an email when the job has ended or when it was aborted
### 'BEGIN' Mail is sent at the beginning of the job;
### 'END' Mail is sent at the end of the job;
### 'FAIL' Mail is sent when the job fails.
### 'REQUEUE' Mail is sent when the job is re-queued;
### 'ALL' Mail sent for all the above.
YOUREMAIL="[email protected]"
MAILSETTINGS="FAIL"


# ANALYSIS SETTINGS
# REQUIRED: Path_to where the main analysis directory resides. Make sure that it exists
### ANALYSIS SETTINGS
### REQUIRED: Path_to where the main analysis directory resides. Make sure that it exists
PROJECTDIR="/hpc/dhl_ec/svanderlaan/projects/SOMEDIR"

# REQUIRED: Name of the project, this will automatically be made.
### REQUIRED: Name of the project, this will automatically be made.
PROJECTNAME="SOME_FANCY_PROJECTNAME"

# REQUIRED: Analysis settings.
# You can choose one of these options [GWAS/VARIANT/REGION/GENES].
### REQUIRED: Analysis settings.
### You can choose one of these options [GWAS/VARIANT/REGION/GENES].
ANALYSIS_TYPE="VARIANT"
# You can choose one of these options [AEGS/AAAGS/CTMM/UCORBIO/MYOMARKER/HELPFULL/RIVM].
### You can choose one of these options [AEGS/AAAGS/CTMM/UCORBIO/MYOMARKER/HELPFULL/RIVM].
STUDY_TYPE="AEGS"

# REQUIRED
# Indicate the file extension used for the genetic data [bgen, gen, gen.gz, vcf, vcf.gz]
### REQUIRED
### Indicate the file extension used for the genetic data [bgen, gen, gen.gz, vcf, vcf.gz]
GENETICEXTENSION="vcf.gz"

# REQUIRED: give a list of covariates in a file
# Example covariate-list format:
# COHORT Age sex PC1_2013 PC2_2013 PC3_2013 PC4_2013 PC5_2013 PC6_2013 PC7_2013 PC8_2013 PC9_2013 PC10_2013
### REQUIRED: give a list of covariates in a file
### Example covariate-list format:
### COHORT Age sex PC1 PC2
COVARIATE_FILE="${PROJECTDIR}/covariates.txt"

# REQUIRED: give a list of phenotypes to be analyzed
### REQUIRED: give a list of phenotypes to be analyzed
PHENOTYPE_FILE="${PROJECTDIR}/phenotypes.txt"

# SPECIFIC DATA SETTINGS
#
# REQUIRED: location of [imputed] data to use -- all BGEN-format.
### SPECIFIC DATA SETTINGS
### REQUIRED: location of [imputed] data to use -- all BGEN-format.
#
# ### AEGS, 1000G phase 3, GoNL5 - bgen files
### AEGS, 1000G phase 3, GoNL5 - bgen files
# IMPUTEDDATA="/hpc/dhl_ec/data/_ae_originals/AEGS_COMBINED_IMPUTE2_1000Gp3_GoNL5/aegs_combo_1kGp3GoNL5_RAW_chr"
# ### AEGS, 1000G phase 1
### AEGS, 1000G phase 1
# IMPUTEDDATA="/hpc/dhl_ec/data/_ae_originals/AEGS_COMBINED_IMPUTE2_BBMRI_1000Gp1v3/aegs_combo_1000g_RAW_chr"
# ### AEGS, GoNL4
### AEGS, GoNL4
# IMPUTEDDATA="/hpc/dhl_ec/data/_ae_originals/AEGS_COMBINED_IMPUTE2_BBMRI_GoNL4/aegs_combo_gonl4_RAW_chr"
# ### AEGS, 1000G phase 3 and HRC r1.1 combined (Michigan Imputation Server) - vcf.gz files
### AEGS, 1000G phase 3 and HRC r1.1 combined (Michigan Imputation Server) - vcf.gz files
IMPUTEDDATA="/hpc/dhl_ec/data/_ae_originals/AEGS_COMBINED_EAGLE2_1000Gp3v5HRCr11/aegs.qc.1kgp3hrcr11.chr"
IMPUTEDDATA_CHRX="/hpc/dhl_ec/data/_ae_originals/AEGS_COMBINED_EAGLE2_1000Gp3v5HRCr11/_chr23_1kg_gonl5/aegs.1kgp3gonl5.chr"

Expand All @@ -115,23 +112,23 @@ IMPUTEDDATA_CHRX="/hpc/dhl_ec/data/_ae_originals/AEGS_COMBINED_EAGLE2_1000Gp3v5H
### AAAGS, HRC r1.1 (Michigan Imputation Server)
# IMPUTEDDATA="/hpc/dhl_ec/data/_aaa_originals/AAAGS_EAGLE2_HRC_r11_2016/aaags.hrc_r11_2016.chr"

# ### CTMMGS, 1000G phase 3, GoNL5
### CTMMGS, 1000G phase 3, GoNL5
# IMPUTEDDATA="/hpc/dhl_ec/data/_ctmm_originals/CTMMAxiomTX_IMPUTE2_1000Gp3_GoNL5/ctmm_1kGp3GoNL5_RAW_chr"
### CTMMGS, 1000G phase 3 (Michigan Imputation Server)
# IMPUTEDDATA="/hpc/dhl_ec/data/_ctmm_originals/CTMMAxiomTX_EAGLE2_1000Gp3/ctmmgs.1kgp3.chr"
### CTMMGS, HRC r1.1 (Michigan Imputation Server)
# IMPUTEDDATA="/hpc/dhl_ec/data/_ctmm_originals/CTMMAxiomTX_EAGLE2_HRC_r11_2016/ctmmgs.hrc_r11_2016.chr"

# REQUIRED: location of sample file.
### REQUIRED: location of sample file.
#
# ### AEGS
### AEGS
SAMPLE_FILE="/hpc/dhl_ec/svanderlaan/projects/SOMEDIR/SNP/20201105.LOOKUP.AEGS123.sample"
SAMPLE_FILE_CHRX="/hpc/dhl_ec/svanderlaan/projects/SOMEDIR/SNP/20201105.LOOKUP.AEGS123.chrX.sample"

# ### AAAGS
### AAAGS
# SAMPLE_FILE="/hpc/dhl_ec/data/_aaa_originals/pheno_cov_exclusions/aaags_phenocov.sample"

# ### CTMMGS
### CTMMGS
# SAMPLE_FILE="/hpc/dhl_ec/data/_ctmm_originals/pheno_cov_exclusions/ctmm_phenocov.sample"

### REQUIRED: exclusion criteria according to the format "-[in/ex]clude_samples_where <name> [=|==|!=] <value>"
Expand All @@ -141,128 +138,64 @@ SAMPLE_FILE_CHRX="/hpc/dhl_ec/svanderlaan/projects/SOMEDIR/SNP/20201105.LOOKUP.A
### SampleID123Y
### SampleID123Z

### DEFAULT
### REQUIRED: exclusion requirement; DEFAULT
EXCLUSION_CRITERIA="-exclude_samples_where \"SELECTION\"==\"not_selected\" "

# ### AEGS specific exclusion lists
### REQUIRED: provide specific exclusion description, no space, all capitals
EXCLUSION="EXCL_DEFAULT"
# EXCLUSION_LIST=/hpc/dhl_ec/data/_ae_originals/pheno_cov_exclusions/exclusion_nonCEA.list
# EXCLUSION="EXCL_FEMALES"
# EXCLUSION_LIST=/hpc/dhl_ec/data/_ae_originals/pheno_cov_exclusions/exclusion_nonCEA_Females.list
# EXCLUSION="EXCL_MALES"
# EXCLUSION_LIST=/hpc/dhl_ec/data/_ae_originals/pheno_cov_exclusions/exclusion_nonCEA_Males.list
# EXCLUSION="EXCL_CKD"
# EXCLUSION_LIST=/hpc/dhl_ec/data/_ae_originals/pheno_cov_exclusions/exclusion_nonCEA_CKD.list
# EXCLUSION="EXCL_NONCKD"
# EXCLUSION_LIST=/hpc/dhl_ec/data/_ae_originals/pheno_cov_exclusions/exclusion_nonCEA_nonCKD.list
# EXCLUSION="EXCL_T2D"
# EXCLUSION_LIST=/hpc/dhl_ec/data/_ae_originals/pheno_cov_exclusions/exclusion_nonCEA_T2D.list
# EXCLUSION="EXCL_NONT2D"
# EXCLUSION_LIST=/hpc/dhl_ec/data/_ae_originals/pheno_cov_exclusions/exclusion_nonCEA_nonT2D.list
# EXCLUSION="EXCL_SMOKER"
# EXCLUSION_LIST=/hpc/dhl_ec/data/_ae_originals/pheno_cov_exclusions/exclusion_nonCEA_SMOKER.list
# EXCLUSION="EXCL_NONSMOKER"
# EXCLUSION_LIST=/hpc/dhl_ec/data/_ae_originals/pheno_cov_exclusions/exclusion_nonCEA_nonSMOKER.list
# EXCLUSION="EXCL_PRE2007"
# EXCLUSION_LIST=/hpc/dhl_ec/data/_ae_originals/pheno_cov_exclusions/exclusion_nonCEA_pre2007.list
# EXCLUSION="EXCL_POST2007"
# EXCLUSION_LIST=/hpc/dhl_ec/data/_ae_originals/pheno_cov_exclusions/exclusion_nonCEA_post2007.list
# EXCLUSION="EXCL_DIURETICS"
# EXCLUSION_LIST=/hpc/dhl_ec/data/_ae_originals/pheno_cov_exclusions/exclusion_nonCEA_DIURETICS.list
# EXCLUSION="EXCL_NONDIURETICS"
# EXCLUSION_LIST=/hpc/dhl_ec/data/_ae_originals/pheno_cov_exclusions/exclusion_nonCEA_nonDIURETICS.list

# ### AAAGS specific exclusion lists
# EXCLUSION="EXCL_DEFAULT"
# EXCLUSION_LIST="/hpc/dhl_ec/data/_aaa_originals/pheno_cov_exclusions/exclusion_nonAAAGS.list"
# EXCLUSION="EXCL_FEMALES"
# EXCLUSION_LIST="/hpc/dhl_ec/data/_aaa_originals/pheno_cov_exclusions/exclusion_nonAAAGS_Females.list"
# EXCLUSION="EXCL_MALES"
# EXCLUSION_LIST="/hpc/dhl_ec/data/_aaa_originals/pheno_cov_exclusions/exclusion_nonAAAGS_Males.list"
# EXCLUSION="EXCL_CKD" -- does not exist yet
# EXCLUSION_LIST="/hpc/dhl_ec/data/_aaa_originals/pheno_cov_exclusions/exclusion_nonAAAGS_CKD.list"
# EXCLUSION="EXCL_NONCKD" -- does not exist yet
# EXCLUSION_LIST="/hpc/dhl_ec/data/_aaa_originals/pheno_cov_exclusions/exclusion_nonAAAGS_nonCKD.list"
# EXCLUSION="EXCL_T2D"
# EXCLUSION_LIST="/hpc/dhl_ec/data/_aaa_originals/pheno_cov_exclusions/exclusion_nonAAAGS_T2D.list"
# EXCLUSION="EXCL_NONT2D"
# EXCLUSION_LIST="/hpc/dhl_ec/data/_aaa_originals/pheno_cov_exclusions/exclusion_nonAAAGS_nonT2D.list"
# EXCLUSION="EXCL_SMOKER"
# EXCLUSION_LIST="/hpc/dhl_ec/data/_aaa_originals/pheno_cov_exclusions/exclusion_nonAAAGS_SMOKER.list"
# EXCLUSION="EXCL_NONSMOKER"
# EXCLUSION_LIST="/hpc/dhl_ec/data/_aaa_originals/pheno_cov_exclusions/exclusion_nonAAAGS_nonSMOKER.list"
# EXCLUSION="EXCL_DIURETICS"
# EXCLUSION_LIST="/hpc/dhl_ec/data/_aaa_originals/pheno_cov_exclusions/exclusion_nonAAAGS_DIURETICS.list"
# EXCLUSION="EXCL_NONDIURETICS"
# EXCLUSION_LIST="/hpc/dhl_ec/data/_aaa_originals/pheno_cov_exclusions/exclusion_nonAAAGS_nonDIURETICS.list"
# EXCLUSION="EXCL_DEFAULT_NONAAA"
# EXCLUSION_LIST="/hpc/dhl_ec/data/_aaa_originals/pheno_cov_exclusions/exclusion_nonAAAGSnonAAA.list"

### CTMMGS specific exclusion lists
# EXCLUSION="EXCL_DEFAULT"
# EXCLUSION_LIST="/hpc/dhl_ec/data/_ctmm_originals/pheno_cov_exclusions/exclusion_nonCTMM.list"
# EXCLUSION="EXCL_FEMALES"
# EXCLUSION_LIST="/hpc/dhl_ec/data/_ctmm_originals/pheno_cov_exclusions/exclusion_nonCTMM_FEMALES.list"
# EXCLUSION="EXCL_MALES"
# EXCLUSION_LIST="/hpc/dhl_ec/data/_ctmm_originals/pheno_cov_exclusions/exclusion_nonCTMM_MALES.list"
# EXCLUSION="EXCL_CKD" -- does not exist yet
# EXCLUSION_LIST="/hpc/dhl_ec/data/_ctmm_originals/pheno_cov_exclusions/exclusion_nonCTMM_CKD.list"
# EXCLUSION="EXCL_NONCKD" -- does not exist yet
# EXCLUSION_LIST="/hpc/dhl_ec/data/_ctmm_originals/pheno_cov_exclusions/exclusion_nonCTMM_nonCKD.list"
# EXCLUSION="EXCL_T2D"
# EXCLUSION_LIST="/hpc/dhl_ec/data/_ctmm_originals/pheno_cov_exclusions/exclusion_nonCTMM_T2D.list"
# EXCLUSION="EXCL_NONT2D"
# EXCLUSION_LIST="/hpc/dhl_ec/data/_ctmm_originals/pheno_cov_exclusions/exclusion_nonCTMM_nonT2D.list"
# EXCLUSION="EXCL_SMOKER"
# EXCLUSION_LIST="/hpc/dhl_ec/data/_ctmm_originals/pheno_cov_exclusions/exclusion_nonCTMM_SMOKER.list"
# EXCLUSION="EXCL_NONSMOKER"
# EXCLUSION_LIST="/hpc/dhl_ec/data/_ctmm_originals/pheno_cov_exclusions/exclusion_nonCTMM_nonSMOKER.list"
# EXCLUSION="EXCL_DIURETICS"
# EXCLUSION_LIST="/hpc/dhl_ec/data/_ctmm_originals/pheno_cov_exclusions/exclusion_nonCTMM_DIURETICS.list"
# EXCLUSION="EXCL_NONDIURETICS"
# EXCLUSION_LIST="/hpc/dhl_ec/data/_ctmm_originals/pheno_cov_exclusions/exclusion_nonCTMM_nonDIURETICS.list"

# REQUIRED: ANALYSIS SPECIFIC ARGUMENTS
# For per-variant analysis
# EXAMPLE FORMAT
# rs1234 1 12345567
# rs5678 2 12345567
# rs4321 14 12345567
# rs9876 20 12345567
### REQUIRED: ANALYSIS SPECIFIC ARGUMENTS
### For per-variant analysis
### EXAMPLE FORMAT
### rs1234 1 12345567
### rs5678 2 12345567
### rs4321 14 12345567
### rs9876 20 12345567
VARIANTLIST="${PROJECTDIR}/variantlist.txt"

# REQUIRED: For GWAS, GENE, REGIONAL, and VARIANT analyses -- options: [STANDARDIZE/RAW]
### REQUIRED: For GWAS, GENE, REGIONAL, and VARIANT analyses -- options: [STANDARDIZE/RAW]
STANDARDIZE="RAW"
# REQUIRED: You can choose one of these method options [expected/score/newml] -- expected is likely best;
# refer to SNPTEST documentation and more method options.
# If you choose `-method newml`, you must supply the baseline-phenotype to which the other
# discrete phenotypes are compared.
### REQUIRED: You can choose one of these method options [expected/score/newml] -- expected is likely best;
### refer to SNPTEST documentation and more method options.
### If you choose `-method newml`, you must supply the baseline-phenotype to which the other
### discrete phenotypes are compared.
METHOD="expected"
BASELINEPHENOTYPE="control"
# REQUIRED: You can indicate to condition on a (list of) variant(s) [NORMAL/CONDITION]; refer to SNPTEST documentation.
### REQUIRED: You can indicate to condition on a (list of) variant(s) [NORMAL/CONDITION]; refer to SNPTEST documentation.
CONDITION="NORMAL"
# CONDITIONLIST="${PROJECTDIR}/conditionvariants.rs2521501.txt"
CONDITIONLIST="${PROJECTDIR}/conditionvariants.rs17514846.txt"

# REQUIRED: For GWAS -- make PLINK/this work with VCF files NEW VERSION
### REQUIRED: For GWAS -- make PLINK/this work with VCF files NEW VERSION
CLUMP_P2="1"
CLUMP_P1="0.000005" # should be of the form 0.005 rather than 5e-3
CLUMP_R2="0.2"
CLUMP_KB="500"
CLUMP_FIELD="P"

# REQUIRED: For regional analysis -- handle this via a file! NEW VERSION
### REQUIRED: For regional analysis -- handle this via a file! NEW VERSION
CHR="1" # e.g. 1
REGION_START="154376264" # e.g. 154376264
REGION_END="154476264" # e.g. 154476264

# REQUIRED: For per-gene analysis
### REQUIRED: For per-gene analysis
GENES_FILE="${PROJECTDIR}/genelist.txt"

# REQUIRED: For GWAS/REGION/GENE analysis
### REQUIRED: For GWAS/REGION/GENE analysis
RANGE="500000" # 500000=500kb, needed for GWAS (LocusZoom plots); and GENE analyses (analysis and LocusZoom plots)

# REQUIRED: Filter settings -- specifically, GWAS, GENE and REGIONAL analyses
### REQUIRED: Filter settings -- specifically, GWAS, GENE and REGIONAL analyses
INFO="0.3"
MAC="6"
CAF="0.005"
Expand All @@ -274,12 +207,12 @@ VARIANTID="2" # this can handle by parseTable! NEW VERSION
PVALUE="17" # this can handle by parseTable! NEW VERSION
RANGELZ=$(expr "$RANGE" / 1000) # move this to the locuszoom-script! NEW VERSION

# REQUIRED: References -- these will be created upon installation
# You can choose one of these options [1kGp3v5GoNL5/1kGp1v3/GoNL4].
### REQUIRED: References -- these will be created upon installation
### You can choose one of these options [1kGp3v5GoNL5/1kGp1v3/GoNL4].
REFERENCE="1kGp3v5GoNL5"
REFERENCEDATA="${GWASTOOLKITDIR}/RESOURCES/1000Gp3v5_EUR/1000Gp3v5.20130502.EUR"
# You can choose one of these:
# - refSeq based: refseq_GRCh37_hg19_Feb2009.txt.gz
# - GENCODE based: gencode_v19_GRCh37_hg19_Feb2009.txt.gz
# - PLINK-style gene list: glist-hg19.gz
### You can choose one of these:
### - refSeq based: refseq_GRCh37_hg19_Feb2009.txt.gz
### - GENCODE based: gencode_v19_GRCh37_hg19_Feb2009.txt.gz
### - PLINK-style gene list: glist-hg19.gz
HG19_GENES="${GWASTOOLKITDIR}/RESOURCES/glist-hg19.gz"

0 comments on commit cd5445e

Please sign in to comment.