Skip to content

Commit

Permalink
Merge pull request #27 from SorenKarst/develop
Browse files Browse the repository at this point in the history
Fix racon bug and integrate UMI filtering in pipeline
  • Loading branch information
SorenKarst authored Feb 9, 2020
2 parents 9ff8860 + 327c79f commit ccfd11f
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 108 deletions.
39 changes: 24 additions & 15 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ Tools:
For help with a specific tool or pipeline:
longread_umi <name> -h
```



```
Expand All @@ -227,7 +227,7 @@ where:
-n Process n number of bins. If not defined all bins
are processed.
```



```
Expand Down Expand Up @@ -257,7 +257,7 @@ where:
-n Barcode numbers used. [Default = '1-120'].
-t Number of threads used.
```



```
Expand Down Expand Up @@ -292,7 +292,7 @@ where:
sequences. Default 2.
-t Number of threads used.
```



```
Expand Down Expand Up @@ -331,7 +331,7 @@ where:
-T Number of medaka jobs to start. Threads pr. job is threads/jobs.
[Default = 1].
```



```
Expand Down Expand Up @@ -379,7 +379,7 @@ longread_umi nanopore_settings_test
-y 3
-n 1000
```



```
Expand Down Expand Up @@ -413,7 +413,7 @@ where:
-u Directory with UMI binned reads.
-t Number of threads to use.
```



```
Expand All @@ -436,7 +436,7 @@ where:
are processed.
-t Number of Medaka jobs to run. [Default = 1].
```



```
Expand All @@ -461,7 +461,7 @@ where:
-e Length of terminal end to search for primers. [Default = 500]
-n Subset reads before search. [Default = 100000]
```



```
Expand Down Expand Up @@ -503,7 +503,7 @@ wget https://www.arb-silva.de/fileadmin/silva_databases/
release_132/Exports/SILVA_132_SSURef_Nr99_tax_silva.fasta.gz
gunzip SILVA_132_SSURef_Nr99_tax_silva.fasta.gz
```



```
Expand All @@ -527,7 +527,7 @@ where:
-t Number of threads to use.
-l Log directory
```



```
Expand All @@ -537,7 +537,8 @@ where:
adaptor regions.
usage: umi_binning [-h] (-d file -o dir -m value -M value )
(-s value -e value -f string -F string -r string -R string -p -t value)
(-s value -e value -f string -F string -r string -R string -p )
(-u value -U value -O value -S value -t value)
where:
-h Show this help text.
Expand All @@ -551,10 +552,18 @@ where:
-F Forward primer sequence.
-r Reverse adaptor sequence.
-R Reverse primer sequence.
-p Flag to disable Nanopore trimming and filtering. Use with PacBio reads.
-p Flag to disable Nanopore trimming and filtering.
Use with PacBio reads.
-u Discard bins with a mean UMI match error above u.
-U Discard bins with a UMI match error standard
deviation above U.
-O Normalize read orientation fraction to 'O' if < 'O' reads are
either +/- strand orientation.
-N Max number of reads with +/- orientation. [Default = 10000]
-S UMI bin size/UMI cluster size cutoff. [Default = 10]
-t Number of threads to use.
```



```
Expand All @@ -576,7 +585,7 @@ where:
-t Number of threads to use. [Default = 1]
-b Debug flag. Keep temp files. [Default = NO]
```




Expand Down
96 changes: 46 additions & 50 deletions docs/ONT_R10_ZYMO_rRNA.html

Large diffs are not rendered by default.

68 changes: 32 additions & 36 deletions docs/ONT_R10_ZYMO_rRNA.rmd
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

* Flowcell: R10
* Instrument: MinION
* Basecalling: guppy v3.4.4
* Basecalling: guppy v3.4.4 with HAC model

### Generate UMI consensus sequences

Expand All @@ -46,7 +46,7 @@ longread_umi nanopore_pipeline \
-R CGACATCGAGGTGCCAAAC \
-c 2 \
-p 2 \
-t 40 \
-t 10 \
-T 1
```

Expand All @@ -58,7 +58,7 @@ longread_umi qc_pipeline \
-r "zymo_curated" \
-u umi_out \
-o umi_out/qc \
-t 40
-t 10
```

## Validate UMI consensus sequences
Expand Down Expand Up @@ -97,15 +97,11 @@ load("validation/ONT_R10_ZYMO_rRNA_ep.Rdata")
```


Perform UMI bin post-processing filtering (will be incoporated in pipeline in the future)
Remove UMI sequences without primers in both ends
```{r echo=TRUE, message=FALSE, warning=FALSE}
qc <- filter(
qc,
ror > 10^-0.6, # remove bins with extreme read orientation ratios
ror < 10^0.6,
umi_bin_size/umi_cluster_size < 10, # remove bins with extreme bin to cluster size
umi_match_error < 3.5, # remove bins with high mismatch between UMI reference and reads
!is.na(length) # remove umi bins where >=1 gene specific primers was not found in consensus sequence.
!is.na(length) # remove umi bins where one or both gene specific primers was not found in the consensus sequence.
)
ep <- filter(
Expand Down Expand Up @@ -170,38 +166,38 @@ Flag contamination and chimeras. Flagged chimeras and contamination below match
```{r echo=TRUE, message=FALSE, warning=FALSE}
cont <- tibble(
umi = c(
"umi47838bins",
"umi177241bins",
"umi13630bins",
"umi173605bins",
"umi159967bins",
"umi185039bins",
"umi181256bins",
"umi6705bins",
"umi42968bins",
"umi238547bins",
"umi199820bins",
"umi151265bins",
"umi6094bins",
"umi193827bins",
"umi181472bins",
"umi198347bins",
"umi234765bins",
"umi180359bins",
"umi180080bins",
"umi17145bins",
"umi189949bins"
),
"umi191103bins",
"umi111976bins",
"umi217947bins",
"umi185324bins",
"umi191976bins",
"umi44490bins",
"umi14613bins",
"umi181378bins",
"umi6637bins",
"umi151082bins",
"umi35088bins",
"umi173219bins",
"umi163928bins",
"umi181158bins",
"umi6090bins",
"umi233806bins",
"umi19568bins",
"umi187615bins",
"umi185965bins",
"umi171705bins",
"umi171734bins"
),
flag = "contamination"
)
chi <- tibble(
umi = c(
"umi176051bins",
"umi12507bins",
"umi183450bins",
"umi146629bins"
),
"umi12455bins",
"umi178211bins",
"umi158224bins",
"umi176163bins"
),
flag = "chimera"
)
```
Expand Down
Binary file modified docs/rdata/ONT_R10_ZYMO_rRNA_ep.Rdata
Binary file not shown.
Binary file modified docs/rdata/ONT_R10_ZYMO_rRNA_qc.Rdata
Binary file not shown.
13 changes: 6 additions & 7 deletions scripts/validation_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,8 @@ lu_compile_qc <- function(
dplyr::transmute(umi = gsub(";.*", "", umi),
ref_error = error,
ref_tax = target)

if(!is.null(ref_ssu_sam)){
if(!is.null(silva)){
ref_ssu_error <- readr::read_delim(
file = paste(data_dir, "/", ref_ssu_sam, sep = ""),
delim = "\t",
Expand All @@ -171,9 +171,7 @@ lu_compile_qc <- function(
dplyr::transmute(umi = gsub(";.*", "", umi),
ref_ssu_error = error,
ref_ssu_tax = target)
}

if(!is.null(silva)){

silva_error <- readr::read_delim(
file = paste(data_dir, "/", silva_ssu_sam, sep = ""),
delim = "\t",
Expand Down Expand Up @@ -248,7 +246,7 @@ lu_compile_qc <- function(
qc <- umi_stats %>%
left_join(con_length, by = "umi") %>%
left_join(ref_error, by = "umi") %>%
{if(!is.null(ref_ssu_sam))left_join(., ref_ssu_error, by = "umi") else .} %>%
{if(!is.null(silva))left_join(., ref_ssu_error, by = "umi") else .} %>%
left_join(chimera, by = "umi") %>%
{if(!is.null(silva))left_join(., silva, by = "umi") else .} %>%
{if(!is.null(read_orientation))left_join(., ror, by = "umi") else .}
Expand Down Expand Up @@ -1103,6 +1101,7 @@ lu_errortype_plot <- function(
lu_errortype_plot_tbl <- function(
profile,
break_size = 5,
digits = 3,
title){

# Summarise based on error type
Expand Down Expand Up @@ -1143,7 +1142,7 @@ lu_errortype_plot_tbl <- function(
# Format decimals
mutate_at(
vars(all_err:del_err),
~round(., 3)
~round(., digits)
)
colnames(errortype_tbl) <- c(
"UMI bin size",
Expand Down

0 comments on commit ccfd11f

Please sign in to comment.