Skip to content

Commit

Permalink
updated docs and sim12
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt Lyon committed Nov 24, 2021
1 parent 4af91ff commit f7786bf
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 78 deletions.
54 changes: 21 additions & 33 deletions mkdocs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
```shell
./varGWAS

Program to perform vGWAS of trait against variants in the BGEN format
Program to perform GWAS of trait variability against variants in the BGEN format
Usage:
varGWAS C++ v1.0.0 [OPTION...]
varGWAS v1.2.1 [OPTION...]

-v, --variable_file arg Path to phenotype file
-s, --sep arg File separator
Expand All @@ -14,45 +14,19 @@ Usage:
-b, --bgen_file arg Path to BGEN file
-p, --phenotype arg Column name for phenotype
-i, --id arg Column name for genotype identifier
-r, --robust Robust method using median value (Brown-Forsythe)
-m, --maf arg Filter out variants with a MAF below this threshold
-h, --help Print usage
-t, --threads arg Number of threads (default: 8)
-t, --threads arg Number of threads
```

- Unordered categorical variables should be one-hot encoded.
- Do not provide null values in the phenotype file - these should be filtered out.

# Docker
# Covariates

Perform GWAS
In addition to standard covariates, also include the square of continuous/ordinal phenotypes to adjust the variance effect.

```shell
docker run \
-v /Users/ml18692/projects/varGWAS/test/data:/data \
-e SPDLOG_LEVEL=debug \
-it vargwas \
-v /data/phenotypes.csv \
-s , \
-c sex,age,PC.1,PC.2,PC.3,PC.4,PC.5,PC.6,PC.7,PC.8,PC.9,PC.10 \
-o /data/output.txt \
-b /data/genotypes.bgen \
-p Y \
-i S \
-t 1
```

# Unit tests

Run unit tests

```shell
mkdir -p build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Debug
make
```

# Simulation
# Simulations

See [README](https://github.com/MRCIEU/varGWAS/blob/master/sim/README.md)

Expand All @@ -65,3 +39,17 @@ the [spdlog](https://github.com/gabime/spdlog#load-log-levels-from-env-variable-
export SPDLOG_LEVEL=debug
./varGWAS
```

# Unit tests

Run unit tests

```shell
# build debug release
mkdir -p build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Debug
make
# run tests
./bin/varGWAS_test
```
8 changes: 4 additions & 4 deletions sim/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,10 +121,10 @@ for p in $(seq 0 0.5 6); do
sbatch runR.sh sim12.R -p "$p" -i <i>
cd ..
done
head -n1 0/sim12_0.csv > results.csv
cat */sim12_*.csv | grep -v b0_dummy >> results.csv
head -n1 0/sim12_0.csv > results.csv
cat */sim12_*.csv | grep -v b0_dummy >> results.csv
head -n1 0.0/sim12_0_0.csv > results.csv
cat */sim12_*csv | grep -v b1_dummy >> results.csv
head -n1 0.0/sim12_0_1.csv > results.csv
cat */sim12_*csv | grep -v b1_dummy >> results.csv
```

## Sim13 - Adjusting the variance effect for the interaction
Expand Down
4 changes: 2 additions & 2 deletions sim/sim12.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ n_obs <- 10000
n_sim <- 1000
af <- 0.4

# main effect size of X on Y explaining 5% variance
delta <- 0.33
# main effect size of X on Y detectable with 95% CI
delta <- 0.0525
theta <- delta * opt$phi

# simulate GxE interaction effects and estimate power
Expand Down
4 changes: 2 additions & 2 deletions sim/sim12_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ dev.off()
# coverage
results <- merge(results, v1_mean, "phi")
results <- merge(results, v2_mean, "phi")
names(results)[17] <- "v1_mean"
names(results)[18] <- "v2_mean"
names(results)[15] <- "v1_mean"
names(results)[16] <- "v2_mean"
results$b1_dummy_lci <- results$b1_dummy - (1.96 * results$s1_dummy)
results$b1_dummy_uci <- results$b1_dummy + (1.96 * results$s1_dummy)
results$b2_dummy_lci <- results$b2_dummy - (1.96 * results$s2_dummy)
Expand Down
42 changes: 5 additions & 37 deletions sim/sim1_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,81 +41,49 @@ t <- fread(paste0("t/data/power_t.csv"))
# process data

# N
cpp_bp_n <- calc_power(n, "P.cpp_bp", 200, c("phi", "lambda"))
cpp_bp_n$dist <- "Normal"
cpp_bp_n$method <- "Breusch-Pagan"

cpp_bf_n <- calc_power(n, "P.cpp_bf", 200, c("phi", "lambda"))
cpp_bf_n$dist <- "Normal"
cpp_bf_n$method <- "Brown-Forsythe (LAD)"

osca_levene_n <- calc_power(n, "P.osca_mean", 200, c("phi", "lambda"))
osca_levene_n$dist <- "Normal"
osca_levene_n$method <- "Levene"

osca_bf_n <- calc_power(n, "P.osca_median", 200, c("phi", "lambda"))
osca_bf_n$dist <- "Normal"
osca_bf_n$method <- "Brown-Forsythe"

# Mixed N
cpp_bp_mn <- calc_power(mn, "P.cpp_bp", 200, c("phi", "lambda"))
cpp_bp_mn$dist <- "Mixed normal"
cpp_bp_mn$method <- "Breusch-Pagan"

cpp_bf_mn <- calc_power(mn, "P.cpp_bf", 200, c("phi", "lambda"))
cpp_bf_mn$dist <- "Mixed normal"
cpp_bf_mn$method <- "Brown-Forsythe (LAD)"

osca_levene_mn <- calc_power(mn, "P.osca_mean", 200, c("phi", "lambda"))
osca_levene_mn$dist <- "Mixed normal"
osca_levene_mn$method <- "Levene"

osca_bf_mn <- calc_power(mn, "P.osca_median", 200, c("phi", "lambda"))
osca_bf_mn$dist <- "Mixed normal"
osca_bf_mn$method <- "Brown-Forsythe"

# Lognormal
cpp_bp_l <- calc_power(l, "P.cpp_bp", 200, c("phi", "lambda"))
cpp_bp_l$dist <- "Lognormal"
cpp_bp_l$method <- "Breusch-Pagan"

cpp_bf_l <- calc_power(l, "P.cpp_bf", 200, c("phi", "lambda"))
cpp_bf_l$dist <- "Lognormal"
cpp_bf_l$method <- "Brown-Forsythe (LAD)"

osca_levene_l <- calc_power(l, "P.osca_mean", 200, c("phi", "lambda"))
osca_levene_l$dist <- "Lognormal"
osca_levene_l$method <- "Levene"

osca_bf_l <- calc_power(l, "P.osca_median", 200, c("phi", "lambda"))
osca_bf_l$dist <- "Lognormal"
osca_bf_l$method <- "Brown-Forsythe"

# T-dist
cpp_bp_t <- calc_power(t, "P.cpp_bp", 200, c("phi", "lambda"))
cpp_bp_t$dist <- "T-dist"
cpp_bp_t$method <- "Breusch-Pagan"

cpp_bf_t <- calc_power(t, "P.cpp_bf", 200, c("phi", "lambda"))
cpp_bf_t$dist <- "T-dist"
cpp_bf_t$method <- "Brown-Forsythe (LAD)"

osca_levene_t <- calc_power(t, "P.osca_mean", 200, c("phi", "lambda"))
osca_levene_t$dist <- "T-dist"
osca_levene_t$method <- "Levene"

osca_bf_t <- calc_power(t, "P.osca_median", 200, c("phi", "lambda"))
osca_bf_t$dist <- "T-dist"
osca_bf_t$method <- "Brown-Forsythe"

# combine
results <- rbind(
cpp_bp_n, cpp_bf_n, osca_levene_n, osca_bf_n,
cpp_bp_mn, cpp_bf_mn, osca_levene_mn, osca_bf_mn,
cpp_bp_l, cpp_bf_l, osca_levene_l, osca_bf_l,
cpp_bp_t, cpp_bf_t, osca_levene_t, osca_bf_t
cpp_bf_n, osca_bf_n,
cpp_bf_mn, osca_bf_mn,
cpp_bf_l, osca_bf_l,
cpp_bf_t, osca_bf_t
)
results$method <- factor(results$method, levels = c("Breusch-Pagan", "Brown-Forsythe (LAD)", "Levene", "Brown-Forsythe"))
results$method <- factor(results$method, levels = c("Brown-Forsythe", "Brown-Forsythe (LAD)"))
results$dist <- factor(results$dist, levels = c("Normal", "Mixed normal", "Lognormal", "T-dist"))
results$lambda <- factor(results$lambda, levels = c(1,10,100,1000))

Expand Down
2 changes: 2 additions & 0 deletions sim/sim3.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ p1 <- qqgplot(results, "log", "P.cpp_bf")
p2 <- qqgplot(results, "sqrt", "P.cpp_bf")
p3 <- qqgplot(results, "irnt", "P.cpp_bf")
p4 <- qqgplot(results, "cube_root", "P.cpp_bf")
p5 <- qqgplot(results, "none", "P.cpp_bf")

p <- ggarrange(p1, p2, p3, p4, labels = c("A", "B", "C", "D"), ncol = 2, nrow = 2)
pdf("data/trans_t1e_bf.pdf")
Expand All @@ -120,6 +121,7 @@ p1 <- qqgplot(results, "log", "P.osca_median")
p2 <- qqgplot(results, "sqrt", "P.osca_median")
p3 <- qqgplot(results, "irnt", "P.osca_median")
p4 <- qqgplot(results, "cube_root", "P.osca_median")
p5 <- qqgplot(results, "none", "P.osca_median")

p <- ggarrange(p1, p2, p3, p4, labels = c("A", "B", "C", "D"), ncol = 2, nrow = 2)
pdf("data/trans_t1e_osca_median.pdf")
Expand Down

0 comments on commit f7786bf

Please sign in to comment.