Skip to content

Commit 97b82ac

Browse files
committed
updated for bowtie2 logs for each sample
1 parent 455ed3c commit 97b82ac

File tree

1 file changed

+11
-46
lines changed

1 file changed

+11
-46
lines changed

workflow/scripts/plot_mapping_rates.R

+11-46
Original file line numberDiff line numberDiff line change
@@ -7,65 +7,30 @@ sink(slog, type = "message")
77
library(tidyverse)
88
library(cowplot)
99

10-
# Get log files and dirs
11-
# These log files are empty but use them to get
12-
# log dirs and find proper log files
1310
log.files <- snakemake@input["log"]
14-
log.dirs <- unlist(lapply(log.files, dirname))
15-
16-
# Get proper log files (log file starts with pipeline- and ends with .log)
17-
log.files <- list.files(log.dirs,
18-
pattern = "pipeline-.*.log",
19-
full.names = TRUE)
20-
21-
# Get base dir for each log file
22-
log.dirs <- dirname(log.files)
23-
24-
if (length(log.dirs) == length(log.files)) {
25-
proper.log.files <- log.files
26-
} else {
27-
# Check for each basedir if there is just one log file
28-
proper.log.files <- list()
29-
for (i in seq_along(log.dirs)) {
30-
# Get log files in basedir
31-
tmp <- list.files(log.dirs[[i]],
32-
pattern = "pipeline-.*.log",
33-
full.names = TRUE)
34-
35-
# If there is more than one log file, select newest
36-
if (length(tmp) > 1) {
37-
proper.log.files[[i]] <- tmp[which.max(file.info(tmp)$mtime)]
38-
}
39-
}
40-
}
4111

4212
# Data frame to store mapping rates of all experiments
4313
mapping.rates.all <- data.frame(sample = character(),
4414
overall_mapping_rate = numeric())
4515

4616
# Extract mappings rates from each log file (one log file per experiment)
47-
for (i in seq_along(proper.log.files)) {
17+
for (i in seq_along(log.files)) {
4818
# Get dir name (dir = experiment)
49-
dir <- basename(log.dirs[[i]])
50-
51-
# Read part of log file that contains mapping data
52-
log.section <- system(paste("sed '/Reading data files/,/Reading GATC file/!d'", log.files[[i]]),
53-
intern = TRUE)
19+
dir <- basename(dirname(log.files[[i]]))
5420

5521
# Get sample names and add dir name
56-
sample.names <- log.section[grepl("Now working on ", log.section)]
57-
sample.names <- str_replace(sample.names, "Now working on ", "")
58-
sample.names <- paste0(dir, "_", str_replace(sample.names, " ...", ""))
22+
sample <- str_replace(paste0(dir, "_",basename(log.files[[i]])), ".log", "")
5923

60-
# Get line numbers where overall mapping rate is printed
61-
rate.lines <- grep("% overall alignment rate", log.section)
24+
# Read log file
25+
log <- readLines(con = log.files[i])
6226

63-
# Extract mapping rates
64-
rates <- as.numeric(str_extract(log.section[rate.lines], "\\d+\\.\\d+"))
27+
# Extract mapping rate
28+
rate <- log[grepl("% overall alignment rate", log)]
29+
rate <- as.numeric(str_replace(rate, "% overall alignment rate", ""))
6530

6631
# Extract mapping rates
67-
mapping.rates <- data.frame(sample = sample.names,
68-
overall_mapping_rate = rates)
32+
mapping.rates <- data.frame(sample = sample,
33+
overall_mapping_rate = rate)
6934

7035
# Add to data frame with all data
7136
mapping.rates.all <- rbind(mapping.rates.all, mapping.rates)
@@ -78,7 +43,7 @@ p <- ggplot(mapping.rates.all,
7843
geom_bar(stat = "identity",
7944
position = "dodge",
8045
colour = "black",
81-
fill = "aquamarine4") +
46+
fill = "#419179") +
8247
theme_cowplot(18) +
8348
theme(plot.margin = margin(t = 0.5,
8449
r = 1.5,

0 commit comments

Comments
 (0)