diff --git a/README.md b/README.md index b2e19eb..8a2382e 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,14 @@ A web interface for SEACR analysis can be found at https://seacr.fredhutch.org ## Recent changes +### v1.3 + +- Fixed a bug in which the bedgraph line thresholding added in v1.2 was failing for some datasets. +- Added a check to filter out any input bedgraph lines containing zero signal. + ### v1.2 -- Fixed a bug in lines 166 and 168 in which misplaced brackets caused the misreporting of the max signal region terminal coordinate for merged signal blocks +- Fixed a bug in lines 166 and 168 in which misplaced brackets caused the misreporting of the max signal region terminal coordinate for merged signal blocks. - Added a counter to keep track of the number of component bedgraph lines that compose each signal block, and a function to calculate the minimum threshold of lines per signal block at which there is a smaller percentage of target signal blocks remaining than control. This is meant to be used as a filter for signal blocks that pass the total signal threshold despite being composed of very few bedgraph lines, which are unlikely to be true peaks. - Changed how the dataframe for density plotting is truncated (previously a hard-coded 90% cutoff): a dataframe of list quantile (i.e. line #/max line#) vs. value quantile (i.e. value/max value) is derived, and the threshold is selected by finding the dataframe pair for which the orthogonal distance below the line defined by (0,0);(1,1) is maximized. @@ -28,7 +33,7 @@ A web interface for SEACR analysis can be found at https://seacr.fredhutch.org ## Usage: - bash SEACR_1.2.sh experimental bedgraph [control bedgraph | numeric threshold] ["norm" | "non"] ["relaxed" | "stringent"] output prefix + bash SEACR_1.3.sh experimental bedgraph [control bedgraph | numeric threshold] ["norm" | "non"] ["relaxed" | "stringent"] output prefix ## Description of input fields: @@ -76,11 +81,11 @@ Field 6: Region representing the farthest upstream and farthest downstream bases ## Examples: - bash SEACR_1.2.sh target.bedgraph IgG.bedgraph norm stringent output + bash SEACR_1.3.sh target.bedgraph IgG.bedgraph norm stringent output Calls enriched regions in target data using normalized IgG control track with stringent threshold - bash SEACR_1.2.sh target.bedgraph IgG.bedgraph non relaxed output + bash SEACR_1.3.sh target.bedgraph IgG.bedgraph non relaxed output Calls enriched regions in target data using non-normalized IgG control track with relaxed threshold - bash SEACR_1.2.sh target.bedgraph 0.01 non stringent output + bash SEACR_1.3.sh target.bedgraph 0.01 non stringent output Calls enriched regions in target data by selecting the top 1% of regions by AUC diff --git a/SEACR_1.2.R b/SEACR_1.3.R similarity index 98% rename from SEACR_1.2.R rename to SEACR_1.3.R index 9648f81..ce11aab 100644 --- a/SEACR_1.2.R +++ b/SEACR_1.3.R @@ -125,7 +125,11 @@ if(is.na(numtest)){ ## If 2nd field is a bedgraph, calculate empirical threshold both2<-c(expmax,ctrlmax) d<-sort(unique(both2)) pctremain2<-function(x) 1-(ecdf(expmax)(x)-ecdf(ctrlmax)(x)) - d0<-min(d[pctremain2(d) > 1]) + if(length(d[pctremain2(d) > 1]) > 0){ + d0<-min(d[pctremain2(d) > 1]) + }else{ + d0<-1 + } invis <- gc(verbose=FALSE) fdr<-c(1-pctremain(x0[1]), 1-pctremain(z0[1])) ## New for SEACR_1.1 }else{ ## If 2nd field is numeric, calculate percentile threshold diff --git a/SEACR_1.2.sh b/SEACR_1.3.sh similarity index 87% rename from SEACR_1.2.sh rename to SEACR_1.3.sh index 6651339..0b08b98 100755 --- a/SEACR_1.2.sh +++ b/SEACR_1.3.sh @@ -7,7 +7,7 @@ then echo " SEACR: Sparse Enrichment Analysis for CUT&RUN - Usage: bash SEACR_1.2.sh .bg [.bg | ] ["norm" | "non"] ["relaxed" | "stringent"] output prefix + Usage: bash SEACR_1.3.sh .bg [.bg | ] ["norm" | "non"] ["relaxed" | "stringent"] output prefix Description of input fields: @@ -42,12 +42,12 @@ then Field 6: Region representing the farthest upstream and farthest downstream bases within the denoted coordinates that are represented by the maximum bedgraph signal Examples: - bash SEACR_1.2.sh target.bedgraph IgG.bedgraph norm stringent output + bash SEACR_1.3.sh target.bedgraph IgG.bedgraph norm stringent output Calls enriched regions in target data using normalized IgG control track with stringent threshold - bash SEACR_1.2.sh target.bedgraph IgG.bedgraph non relaxed output + bash SEACR_1.3.sh target.bedgraph IgG.bedgraph non relaxed output Calls enriched regions in target data using non-normalized IgG control track with relaxed threshold - bash SEACR_1.2.sh target.bedgraph 0.01 non stringent output + bash SEACR_1.3.sh target.bedgraph 0.01 non stringent output Calls enriched regions in target data by selecting the top 1% of regions by area under the curve (AUC) " exit 1 @@ -98,16 +98,16 @@ fi echo "Creating experimental AUC file: $(date)" -awk 'BEGIN{s=1}; {if(s==1){s++}else if(s==2){chr=$1; start=$2; stop=$3; max=$4; coord=$1":"$2"-"$3; auc=$4*($3-$2); num=1; s++}else{if(chr==$1 && $2==stop){num++; stop=$3; auc=auc+($4*($3-$2)); if ($4 > max){max=$4; coord=$1":"$2"-"$3 -}else if($4 == max){split(coord,t,"-"); coord=t[1]"-"$3}}else{print chr"\t"start"\t"stop"\t"auc"\t"max"\t"coord"\t"num; chr=$1; start=$2; stop=$3; max=$4; coord=$1":"$2"-"$3; auc=$4*($3-$2); num=1}}}' $1 > $password.auc.bed +awk 'BEGIN{s=1}; {if(s==1){s++}else if(s==2){if($4 > 0){chr=$1; start=$2; stop=$3; max=$4; coord=$1":"$2"-"$3; auc=$4*($3-$2); num=1; s++}}else{if($4 > 0){if(chr==$1 && $2==stop){num++; stop=$3; auc=auc+($4*($3-$2)); if ($4 > max){max=$4; coord=$1":"$2"-"$3 +}else if($4 == max){split(coord,t,"-"); coord=t[1]"-"$3}}else{print chr"\t"start"\t"stop"\t"auc"\t"max"\t"coord"\t"num; chr=$1; start=$2; stop=$3; max=$4; coord=$1":"$2"-"$3; auc=$4*($3-$2); num=1}}}}' $1 > $password.auc.bed cut -f 4,7 $password.auc.bed > $password.auc if [[ -f $2 ]] then echo "Creating control AUC file: $(date)" - awk 'BEGIN{s=1}; {if(s==1){s++}else if(s==2){chr=$1; start=$2; stop=$3; max=$4; coord=$1":"$2"-"$3; auc=$4*($3-$2); num=1; s++}else{if(chr==$1 && $2==stop){num++; stop=$3; auc=auc+($4*($3-$2)); if ($4 > max){max=$4; coord=$1":"$2"-" -$3}else if($4 == max){split(coord,t,"-"); coord=t[1]"-"$3}}else{print chr"\t"start"\t"stop"\t"auc"\t"max"\t"coord"\t"num; chr=$1; start=$2; stop=$3; max=$4; coord=$1":"$2"-"$3; auc=$4*($3-$2); num=1}}}' $2 > $password2.auc.bed + awk 'BEGIN{s=1}; {if(s==1){s++}else if(s==2){if($4 > 0){chr=$1; start=$2; stop=$3; max=$4; coord=$1":"$2"-"$3; auc=$4*($3-$2); num=1; s++}}else{if($4 > 0){if(chr==$1 && $2==stop){num++; stop=$3; auc=auc+($4*($3-$2)); if ($4 > max){max=$4; coord=$1":"$2"-" +$3}else if($4 == max){split(coord,t,"-"); coord=t[1]"-"$3}}else{print chr"\t"start"\t"stop"\t"auc"\t"max"\t"coord"\t"num; chr=$1; start=$2; stop=$3; max=$4; coord=$1":"$2"-"$3; auc=$4*($3-$2); num=1}}}}' $2 > $password2.auc.bed cut -f 4,7 $password2.auc.bed > $password2.auc fi @@ -119,14 +119,14 @@ path=`dirname $0` if [[ -f $2 ]] && [[ $norm == "norm" ]] then echo "Calculating threshold using normalized control: $(date)" - Rscript $path/SEACR_1.2.R --exp=$password.auc --ctrl=$password2.auc --norm=yes --output=$password + Rscript $path/SEACR_1.3.R --exp=$password.auc --ctrl=$password2.auc --norm=yes --output=$password elif [[ -f $2 ]] then echo "Calculating threshold using non-normalized control: $(date)" - Rscript $path/SEACR_1.2.R --exp=$password.auc --ctrl=$password2.auc --norm=no --output=$password + Rscript $path/SEACR_1.3.R --exp=$password.auc --ctrl=$password2.auc --norm=no --output=$password else echo "Using user-provided threshold: $(date)" - Rscript $path/SEACR_1.2.R --exp=$password.auc --ctrl=$2 --norm=no --output=$password + Rscript $path/SEACR_1.3.R --exp=$password.auc --ctrl=$2 --norm=no --output=$password fi fdr=`cat $password.fdr.txt | sed -n '1p'` ## Added 5/15/19 for SEACR_1.1