@@ -7,65 +7,30 @@ sink(slog, type = "message")
7
7
library(tidyverse )
8
8
library(cowplot )
9
9
10
- # Get log files and dirs
11
- # These log files are empty but use them to get
12
- # log dirs and find proper log files
13
10
log.files <- snakemake @ input [" log" ]
14
- log.dirs <- unlist(lapply(log.files , dirname ))
15
-
16
- # Get proper log files (log file starts with pipeline- and ends with .log)
17
- log.files <- list.files(log.dirs ,
18
- pattern = " pipeline-.*.log" ,
19
- full.names = TRUE )
20
-
21
- # Get base dir for each log file
22
- log.dirs <- dirname(log.files )
23
-
24
- if (length(log.dirs ) == length(log.files )) {
25
- proper.log.files <- log.files
26
- } else {
27
- # Check for each basedir if there is just one log file
28
- proper.log.files <- list ()
29
- for (i in seq_along(log.dirs )) {
30
- # Get log files in basedir
31
- tmp <- list.files(log.dirs [[i ]],
32
- pattern = " pipeline-.*.log" ,
33
- full.names = TRUE )
34
-
35
- # If there is more than one log file, select newest
36
- if (length(tmp ) > 1 ) {
37
- proper.log.files [[i ]] <- tmp [which.max(file.info(tmp )$ mtime )]
38
- }
39
- }
40
- }
41
11
42
12
# Data frame to store mapping rates of all experiments
43
13
mapping.rates.all <- data.frame (sample = character (),
44
14
overall_mapping_rate = numeric ())
45
15
46
16
# Extract mappings rates from each log file (one log file per experiment)
47
- for (i in seq_along(proper. log.files )) {
17
+ for (i in seq_along(log.files )) {
48
18
# Get dir name (dir = experiment)
49
- dir <- basename(log.dirs [[i ]])
50
-
51
- # Read part of log file that contains mapping data
52
- log.section <- system(paste(" sed '/Reading data files/,/Reading GATC file/!d'" , log.files [[i ]]),
53
- intern = TRUE )
19
+ dir <- basename(dirname(log.files [[i ]]))
54
20
55
21
# Get sample names and add dir name
56
- sample.names <- log.section [grepl(" Now working on " , log.section )]
57
- sample.names <- str_replace(sample.names , " Now working on " , " " )
58
- sample.names <- paste0(dir , " _" , str_replace(sample.names , " ..." , " " ))
22
+ sample <- str_replace(paste0(dir , " _" ,basename(log.files [[i ]])), " .log" , " " )
59
23
60
- # Get line numbers where overall mapping rate is printed
61
- rate.lines <- grep( " % overall alignment rate " , log.section )
24
+ # Read log file
25
+ log <- readLines( con = log.files [ i ] )
62
26
63
- # Extract mapping rates
64
- rates <- as.numeric(str_extract(log.section [rate.lines ], " \\ d+\\ .\\ d+" ))
27
+ # Extract mapping rate
28
+ rate <- log [grepl(" % overall alignment rate" , log )]
29
+ rate <- as.numeric(str_replace(rate , " % overall alignment rate" , " " ))
65
30
66
31
# Extract mapping rates
67
- mapping.rates <- data.frame (sample = sample.names ,
68
- overall_mapping_rate = rates )
32
+ mapping.rates <- data.frame (sample = sample ,
33
+ overall_mapping_rate = rate )
69
34
70
35
# Add to data frame with all data
71
36
mapping.rates.all <- rbind(mapping.rates.all , mapping.rates )
@@ -78,7 +43,7 @@ p <- ggplot(mapping.rates.all,
78
43
geom_bar(stat = " identity" ,
79
44
position = " dodge" ,
80
45
colour = " black" ,
81
- fill = " aquamarine4 " ) +
46
+ fill = " #419179 " ) +
82
47
theme_cowplot(18 ) +
83
48
theme(plot.margin = margin(t = 0.5 ,
84
49
r = 1.5 ,
0 commit comments