From d00939821a889586b8d0bf79c28f99b7d05f18e3 Mon Sep 17 00:00:00 2001 From: mpmeers Date: Tue, 19 Nov 2019 00:52:55 -0500 Subject: [PATCH 1/7] Update SEACR_1.2.R Add a check for the length of the d[pctremain2(d) > 1] term at line 128 in order to ensure that assignment of d0 does not fail --- SEACR_1.2.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/SEACR_1.2.R b/SEACR_1.2.R index 9648f81..ce11aab 100644 --- a/SEACR_1.2.R +++ b/SEACR_1.2.R @@ -125,7 +125,11 @@ if(is.na(numtest)){ ## If 2nd field is a bedgraph, calculate empirical threshold both2<-c(expmax,ctrlmax) d<-sort(unique(both2)) pctremain2<-function(x) 1-(ecdf(expmax)(x)-ecdf(ctrlmax)(x)) - d0<-min(d[pctremain2(d) > 1]) + if(length(d[pctremain2(d) > 1]) > 0){ + d0<-min(d[pctremain2(d) > 1]) + }else{ + d0<-1 + } invis <- gc(verbose=FALSE) fdr<-c(1-pctremain(x0[1]), 1-pctremain(z0[1])) ## New for SEACR_1.1 }else{ ## If 2nd field is numeric, calculate percentile threshold From 80cfc8a4f64800dd9aed509b4dc52625c08b208d Mon Sep 17 00:00:00 2001 From: mpmeers Date: Wed, 8 Jan 2020 11:21:08 -0800 Subject: [PATCH 2/7] Update README.md Updated description for v1.3 --- README.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index b2e19eb..fc7eae1 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,13 @@ A web interface for SEACR analysis can be found at https://seacr.fredhutch.org ## Recent changes +### v1.3 + +- Fixed a bug in which the bedgraph line thresholding added in v1.2 was failing for some datasets. + ### v1.2 -- Fixed a bug in lines 166 and 168 in which misplaced brackets caused the misreporting of the max signal region terminal coordinate for merged signal blocks +- Fixed a bug in lines 166 and 168 in which misplaced brackets caused the misreporting of the max signal region terminal coordinate for merged signal blocks. - Added a counter to keep track of the number of component bedgraph lines that compose each signal block, and a function to calculate the minimum threshold of lines per signal block at which there is a smaller percentage of target signal blocks remaining than control. This is meant to be used as a filter for signal blocks that pass the total signal threshold despite being composed of very few bedgraph lines, which are unlikely to be true peaks. - Changed how the dataframe for density plotting is truncated (previously a hard-coded 90% cutoff): a dataframe of list quantile (i.e. line #/max line#) vs. value quantile (i.e. value/max value) is derived, and the threshold is selected by finding the dataframe pair for which the orthogonal distance below the line defined by (0,0);(1,1) is maximized. @@ -28,7 +32,7 @@ A web interface for SEACR analysis can be found at https://seacr.fredhutch.org ## Usage: - bash SEACR_1.2.sh experimental bedgraph [control bedgraph | numeric threshold] ["norm" | "non"] ["relaxed" | "stringent"] output prefix + bash SEACR_1.3.sh experimental bedgraph [control bedgraph | numeric threshold] ["norm" | "non"] ["relaxed" | "stringent"] output prefix ## Description of input fields: @@ -76,11 +80,11 @@ Field 6: Region representing the farthest upstream and farthest downstream bases ## Examples: - bash SEACR_1.2.sh target.bedgraph IgG.bedgraph norm stringent output + bash SEACR_1.3.sh target.bedgraph IgG.bedgraph norm stringent output Calls enriched regions in target data using normalized IgG control track with stringent threshold - bash SEACR_1.2.sh target.bedgraph IgG.bedgraph non relaxed output + bash SEACR_1.3.sh target.bedgraph IgG.bedgraph non relaxed output Calls enriched regions in target data using non-normalized IgG control track with relaxed threshold - bash SEACR_1.2.sh target.bedgraph 0.01 non stringent output + bash SEACR_1.3.sh target.bedgraph 0.01 non stringent output Calls enriched regions in target data by selecting the top 1% of regions by AUC From ec72f6605c659442088b1b71c59d4ae342bd8115 Mon Sep 17 00:00:00 2001 From: mpmeers Date: Wed, 8 Jan 2020 11:23:00 -0800 Subject: [PATCH 3/7] Update SEACR_1.2.sh Updated for SEACR_1.3.sh --- SEACR_1.2.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/SEACR_1.2.sh b/SEACR_1.2.sh index 6651339..f8dac3f 100755 --- a/SEACR_1.2.sh +++ b/SEACR_1.2.sh @@ -7,7 +7,7 @@ then echo " SEACR: Sparse Enrichment Analysis for CUT&RUN - Usage: bash SEACR_1.2.sh .bg [.bg | ] ["norm" | "non"] ["relaxed" | "stringent"] output prefix + Usage: bash SEACR_1.3.sh .bg [.bg | ] ["norm" | "non"] ["relaxed" | "stringent"] output prefix Description of input fields: @@ -42,12 +42,12 @@ then Field 6: Region representing the farthest upstream and farthest downstream bases within the denoted coordinates that are represented by the maximum bedgraph signal Examples: - bash SEACR_1.2.sh target.bedgraph IgG.bedgraph norm stringent output + bash SEACR_1.3.sh target.bedgraph IgG.bedgraph norm stringent output Calls enriched regions in target data using normalized IgG control track with stringent threshold - bash SEACR_1.2.sh target.bedgraph IgG.bedgraph non relaxed output + bash SEACR_1.3.sh target.bedgraph IgG.bedgraph non relaxed output Calls enriched regions in target data using non-normalized IgG control track with relaxed threshold - bash SEACR_1.2.sh target.bedgraph 0.01 non stringent output + bash SEACR_1.3.sh target.bedgraph 0.01 non stringent output Calls enriched regions in target data by selecting the top 1% of regions by area under the curve (AUC) " exit 1 @@ -119,14 +119,14 @@ path=`dirname $0` if [[ -f $2 ]] && [[ $norm == "norm" ]] then echo "Calculating threshold using normalized control: $(date)" - Rscript $path/SEACR_1.2.R --exp=$password.auc --ctrl=$password2.auc --norm=yes --output=$password + Rscript $path/SEACR_1.3.R --exp=$password.auc --ctrl=$password2.auc --norm=yes --output=$password elif [[ -f $2 ]] then echo "Calculating threshold using non-normalized control: $(date)" - Rscript $path/SEACR_1.2.R --exp=$password.auc --ctrl=$password2.auc --norm=no --output=$password + Rscript $path/SEACR_1.3.R --exp=$password.auc --ctrl=$password2.auc --norm=no --output=$password else echo "Using user-provided threshold: $(date)" - Rscript $path/SEACR_1.2.R --exp=$password.auc --ctrl=$2 --norm=no --output=$password + Rscript $path/SEACR_1.3.R --exp=$password.auc --ctrl=$2 --norm=no --output=$password fi fdr=`cat $password.fdr.txt | sed -n '1p'` ## Added 5/15/19 for SEACR_1.1 From a31b625c3318c05ec66d897f708da44d24e4fec9 Mon Sep 17 00:00:00 2001 From: mpmeers Date: Wed, 8 Jan 2020 11:23:21 -0800 Subject: [PATCH 4/7] Rename SEACR_1.2.sh to SEACR_1.3.sh --- SEACR_1.2.sh => SEACR_1.3.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename SEACR_1.2.sh => SEACR_1.3.sh (100%) diff --git a/SEACR_1.2.sh b/SEACR_1.3.sh similarity index 100% rename from SEACR_1.2.sh rename to SEACR_1.3.sh From cdeceee274a531067c503c4b8242dc7b1297d13f Mon Sep 17 00:00:00 2001 From: mpmeers Date: Wed, 8 Jan 2020 11:23:39 -0800 Subject: [PATCH 5/7] Rename SEACR_1.2.R to SEACR_1.3.R --- SEACR_1.2.R => SEACR_1.3.R | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename SEACR_1.2.R => SEACR_1.3.R (100%) diff --git a/SEACR_1.2.R b/SEACR_1.3.R similarity index 100% rename from SEACR_1.2.R rename to SEACR_1.3.R From 9269e0ad54a8b9e190a9d302ef82fbd4365e97fd Mon Sep 17 00:00:00 2001 From: mpmeers Date: Thu, 9 Jan 2020 10:17:09 -0800 Subject: [PATCH 6/7] Update SEACR_1.3.sh Added filter in lines 101-103 and 109-111 to remove bed graph entries with zero signal --- SEACR_1.3.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/SEACR_1.3.sh b/SEACR_1.3.sh index f8dac3f..0b08b98 100755 --- a/SEACR_1.3.sh +++ b/SEACR_1.3.sh @@ -98,16 +98,16 @@ fi echo "Creating experimental AUC file: $(date)" -awk 'BEGIN{s=1}; {if(s==1){s++}else if(s==2){chr=$1; start=$2; stop=$3; max=$4; coord=$1":"$2"-"$3; auc=$4*($3-$2); num=1; s++}else{if(chr==$1 && $2==stop){num++; stop=$3; auc=auc+($4*($3-$2)); if ($4 > max){max=$4; coord=$1":"$2"-"$3 -}else if($4 == max){split(coord,t,"-"); coord=t[1]"-"$3}}else{print chr"\t"start"\t"stop"\t"auc"\t"max"\t"coord"\t"num; chr=$1; start=$2; stop=$3; max=$4; coord=$1":"$2"-"$3; auc=$4*($3-$2); num=1}}}' $1 > $password.auc.bed +awk 'BEGIN{s=1}; {if(s==1){s++}else if(s==2){if($4 > 0){chr=$1; start=$2; stop=$3; max=$4; coord=$1":"$2"-"$3; auc=$4*($3-$2); num=1; s++}}else{if($4 > 0){if(chr==$1 && $2==stop){num++; stop=$3; auc=auc+($4*($3-$2)); if ($4 > max){max=$4; coord=$1":"$2"-"$3 +}else if($4 == max){split(coord,t,"-"); coord=t[1]"-"$3}}else{print chr"\t"start"\t"stop"\t"auc"\t"max"\t"coord"\t"num; chr=$1; start=$2; stop=$3; max=$4; coord=$1":"$2"-"$3; auc=$4*($3-$2); num=1}}}}' $1 > $password.auc.bed cut -f 4,7 $password.auc.bed > $password.auc if [[ -f $2 ]] then echo "Creating control AUC file: $(date)" - awk 'BEGIN{s=1}; {if(s==1){s++}else if(s==2){chr=$1; start=$2; stop=$3; max=$4; coord=$1":"$2"-"$3; auc=$4*($3-$2); num=1; s++}else{if(chr==$1 && $2==stop){num++; stop=$3; auc=auc+($4*($3-$2)); if ($4 > max){max=$4; coord=$1":"$2"-" -$3}else if($4 == max){split(coord,t,"-"); coord=t[1]"-"$3}}else{print chr"\t"start"\t"stop"\t"auc"\t"max"\t"coord"\t"num; chr=$1; start=$2; stop=$3; max=$4; coord=$1":"$2"-"$3; auc=$4*($3-$2); num=1}}}' $2 > $password2.auc.bed + awk 'BEGIN{s=1}; {if(s==1){s++}else if(s==2){if($4 > 0){chr=$1; start=$2; stop=$3; max=$4; coord=$1":"$2"-"$3; auc=$4*($3-$2); num=1; s++}}else{if($4 > 0){if(chr==$1 && $2==stop){num++; stop=$3; auc=auc+($4*($3-$2)); if ($4 > max){max=$4; coord=$1":"$2"-" +$3}else if($4 == max){split(coord,t,"-"); coord=t[1]"-"$3}}else{print chr"\t"start"\t"stop"\t"auc"\t"max"\t"coord"\t"num; chr=$1; start=$2; stop=$3; max=$4; coord=$1":"$2"-"$3; auc=$4*($3-$2); num=1}}}}' $2 > $password2.auc.bed cut -f 4,7 $password2.auc.bed > $password2.auc fi From 64529208ed8d320f77e85f72ec36e763bc04f0c4 Mon Sep 17 00:00:00 2001 From: mpmeers Date: Mon, 13 Jan 2020 11:39:10 -0800 Subject: [PATCH 7/7] Update README.md Updated v1.3 changes --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index fc7eae1..8a2382e 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ A web interface for SEACR analysis can be found at https://seacr.fredhutch.org ### v1.3 - Fixed a bug in which the bedgraph line thresholding added in v1.2 was failing for some datasets. +- Added a check to filter out any input bedgraph lines containing zero signal. ### v1.2