Skip to content

Commit

Permalink
remove --no-pan and report better the errors
Browse files Browse the repository at this point in the history
  • Loading branch information
AndreaGuarracino committed Nov 1, 2023
1 parent fa7a6ca commit 24b423c
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 39 deletions.
37 changes: 18 additions & 19 deletions partition-before-pggb
Original file line number Diff line number Diff line change
Expand Up @@ -67,19 +67,17 @@ vcf_spec=false
multiqc=false

# default values
OUTPUT_DIR=$(pwd)
THREADS=$(getconf _NPROCESSORS_ONLN 2>/dev/null || getconf NPROCESSORS_ONLN 2>/dev/null || echo 1)

# general parameters
output_dir=$OUTPUT_DIR
output_dir=false
input_temp_dir=false
input_paf=false
resume=false
threads=$THREADS
poa_threads=0
keep_intermediate_files=false
compress=false
respect_pansn=true
show_version=false
show_help=false

Expand Down Expand Up @@ -118,7 +116,7 @@ fi

# read the options
cmd=$0" "$@
TEMP=`getopt -o i:o:D:a:p:c:s:l:K:F:k:x:f:B:Xn:j:P:O:Me:t:T:vhASY:G:Q:d:I:R:NbrmZzV: --long input-fasta:,output-dir:,temp-dir:,input-paf:,map-pct-id:,n-mappings:,segment-length:,block-length-min:,mash-kmer:,mash-kmer-thres:,min-match-length:,sparse-map:,sparse-factor:,transclose-batch:,skip-normalization,n-haplotypes:,path-jump-max:,subpath-min:,edge-jump-max:,threads:,poa-threads:,skip-viz,do-layout,help,no-merge-segments,stats,exclude-delim:,poa-length-target:,poa-params:,poa-padding:,run-abpoa,global-poa,write-maf,consensus-spec:,consensus-prefix:,pad-max-depth:,block-id-min:,block-ratio-min:,no-splits,resume,keep-temp-files,multiqc,compress,vcf-spec:,no-pansn,version -n 'pggb' -- "$@"`
TEMP=`getopt -o i:o:D:a:p:c:s:l:K:F:k:x:f:B:Xn:j:P:O:Me:t:T:vhASY:G:Q:d:I:R:NbrmZzV: --long input-fasta:,output-dir:,temp-dir:,input-paf:,map-pct-id:,n-mappings:,segment-length:,block-length-min:,mash-kmer:,mash-kmer-thres:,min-match-length:,sparse-map:,sparse-factor:,transclose-batch:,skip-normalization,n-haplotypes:,path-jump-max:,subpath-min:,edge-jump-max:,threads:,poa-threads:,skip-viz,do-layout,help,no-merge-segments,stats,exclude-delim:,poa-length-target:,poa-params:,poa-padding:,run-abpoa,global-poa,write-maf,consensus-spec:,consensus-prefix:,pad-max-depth:,block-id-min:,block-ratio-min:,no-splits,resume,keep-temp-files,multiqc,compress,vcf-spec:,version -n 'pggb' -- "$@"`
eval set -- "$TEMP"

# extract options and their arguments into variables.
Expand Down Expand Up @@ -162,7 +160,6 @@ while true ; do
-T|--poa-threads) poa_threads=$2 ; shift 2 ;;
-A|--keep-temp-files) keep_intermediate_files=true ; shift ;;
-Z|--compress) compress=true ; shift ;;
--no-pansn) respect_pansn=false ; shift ;;
--version) show_version=true ; shift ;;
-h|--help) show_help=true ; shift ;;
--) shift ; break ;;
Expand All @@ -182,7 +179,7 @@ fi
# Check input parameters
if [ "$show_help" == true ]; then
padding=$(printf %${#0}s) # prints as many spaces as the length of $0
echo "usage: $0 -i <input-fasta> [options]"
echo "usage: $0 -i <input-fasta> -o <output-dir> [options]"
echo "options:"
echo " [wfmash]"
echo " -i, --input-fasta FILE input FASTA/FASTQ file"
Expand Down Expand Up @@ -248,24 +245,17 @@ if [ "$show_help" == true ]; then
echo " -A, --keep-temp-files keep intermediate graphs"
echo " -Z, --compress compress alignment (.paf), graph (.gfa, .og), and MSA (.maf) outputs with pigz,"
echo " and variant (.vcf) outputs with bgzip"
echo " --no-pansn Pangenome Sequence Naming (PanSN) not required for sequence names"
echo " --version display the version of pggb"
echo " -h, --help this text"
echo
echo "Use wfmash, seqwish, smoothxg, odgi, gfaffix, and vg to build, project and display a pangenome graph."
exit
elif [ "$input_fasta" = "false" ]; then
>&2 echo "[pggb] ERROR: mandatory argument: -i/--input-fasta"
elif [ "$input_fasta" = "false" ] || [ "$output_dir" = "false" ]; then
>&2 echo "[pggb] ERROR: mandatory argument: -i/--input-fasta and -o/--output-dir"
exit
elif [ ! -f "${input_fasta}.fai" ]; then
echo "[pggb] ERROR: index for $input_fasta does not exist. Please create it using 'samtools faidx $input_fasta'."
exit 1
elif [ "$respect_pansn" = "false" ] && { [ "$n_haps" = "false" ] || [ "$n_haps" -lt 1 ]; }; then
>&2 echo "[pggb] ERROR: when no-pansn is set, -n/--n-haplotypes must be greater than or equal to 1"
exit
elif [ "$respect_pansn" = "false" -a "$vcf_spec" != "false" ]; then
>&2 echo "[pggb] ERROR: -V/--vcf-spec and --no-pansn are incompatible"
exit
elif [ "$n_mappings" -lt 1 ]; then
>&2 echo "[pggb] ERROR: -c/--n-mappings must be greater than or equal to 1"
exit
Expand All @@ -280,10 +270,19 @@ while IFS= read -r line; do
fi
done < <(cut -f 1 "${input_fasta}.fai")
if [ "$pansn_not_respected" != "false" ]; then
if [ "$respect_pansn" = "false" ]; then
>&2 echo "[pggb] warning: there are sequence names (like '$pansn_not_respected') that do not match the Pangenome Sequence Naming (PanSN)."
else
>&2 echo "[pggb] ERROR: there are sequence names (like '$pansn_not_respected') that do not match the Pangenome Sequence Naming (PanSN). Set --pan-sn to disable this requirement."
>&2 echo "[pggb] warning: there are sequence names (like '$pansn_not_respected') that do not match the Pangenome Sequence Naming (PanSN)."

pansn_error=false
if [ "$vcf_spec" != "false" ]; then
pansn_error=true
>&2 echo "[pggb] ERROR: -V/--vcf-spec cannot be used if the Pangenome Sequence Naming (PanSN) is not respected."
fi
if [ "$n_haps" = "false" ] || [ "$n_haps" -lt 1 ]; then
pansn_error=true
>&2 echo "[pggb] ERROR: -n/--n-haplotypes must be greater than or equal to 1 when the Pangenome Sequence Naming (PanSN) is not respected."
fi

if [ $pansn_error = true ]; then
exit
fi
fi
Expand Down
39 changes: 19 additions & 20 deletions pggb
Original file line number Diff line number Diff line change
Expand Up @@ -67,19 +67,17 @@ vcf_spec=false
multiqc=false

# default values
OUTPUT_DIR=$(pwd)
THREADS=$(getconf _NPROCESSORS_ONLN 2>/dev/null || getconf NPROCESSORS_ONLN 2>/dev/null || echo 1)

# general parameters
output_dir=$OUTPUT_DIR
output_dir=false
input_temp_dir=false
input_paf=false
resume=false
threads=$THREADS
poa_threads=0
keep_intermediate_files=false
compress=false
respect_pansn=true
show_version=false
show_help=false

Expand Down Expand Up @@ -118,7 +116,7 @@ fi

# read the options
cmd=$0" "$@
TEMP=`getopt -o i:o:D:a:p:c:s:l:K:F:k:x:f:B:Xn:j:P:O:Me:t:T:vhASY:G:Q:d:I:R:NbrmZzV: --long input-fasta:,output-dir:,temp-dir:,input-paf:,map-pct-id:,n-mappings:,segment-length:,block-length-min:,mash-kmer:,mash-kmer-thres:,min-match-length:,sparse-map:,sparse-factor:,transclose-batch:,skip-normalization,n-haplotypes:,path-jump-max:,subpath-min:,edge-jump-max:,threads:,poa-threads:,skip-viz,do-layout,help,no-merge-segments,stats,exclude-delim:,poa-length-target:,poa-params:,poa-padding:,run-abpoa,global-poa,write-maf,consensus-spec:,consensus-prefix:,pad-max-depth:,block-id-min:,block-ratio-min:,no-splits,resume,keep-temp-files,multiqc,compress,vcf-spec:,no-pansn,version -n 'pggb' -- "$@"`
TEMP=`getopt -o i:o:D:a:p:c:s:l:K:F:k:x:f:B:Xn:j:P:O:Me:t:T:vhASY:G:Q:d:I:R:NbrmZzV: --long input-fasta:,output-dir:,temp-dir:,input-paf:,map-pct-id:,n-mappings:,segment-length:,block-length-min:,mash-kmer:,mash-kmer-thres:,min-match-length:,sparse-map:,sparse-factor:,transclose-batch:,skip-normalization,n-haplotypes:,path-jump-max:,subpath-min:,edge-jump-max:,threads:,poa-threads:,skip-viz,do-layout,help,no-merge-segments,stats,exclude-delim:,poa-length-target:,poa-params:,poa-padding:,run-abpoa,global-poa,write-maf,consensus-spec:,consensus-prefix:,pad-max-depth:,block-id-min:,block-ratio-min:,no-splits,resume,keep-temp-files,multiqc,compress,vcf-spec:,version -n 'pggb' -- "$@"`
eval set -- "$TEMP"

# extract options and their arguments into variables.
Expand Down Expand Up @@ -162,7 +160,6 @@ while true ; do
-T|--poa-threads) poa_threads=$2 ; shift 2 ;;
-A|--keep-temp-files) keep_intermediate_files=true ; shift ;;
-Z|--compress) compress=true ; shift ;;
--no-pansn) respect_pansn=false ; shift ;;
--version) show_version=true ; shift ;;
-h|--help) show_help=true ; shift ;;
--) shift ; break ;;
Expand All @@ -182,7 +179,7 @@ fi
# Check input parameters
if [ "$show_help" == true ]; then
padding=$(printf %${#0}s) # prints as many spaces as the length of $0
echo "usage: $0 -i <input-fasta> [options]"
echo "usage: $0 -i <input-fasta> -o <output-dir> [options]"
echo "options:"
echo " [wfmash]"
echo " -i, --input-fasta FILE input FASTA/FASTQ file"
Expand Down Expand Up @@ -248,24 +245,17 @@ if [ "$show_help" == true ]; then
echo " -A, --keep-temp-files keep intermediate graphs"
echo " -Z, --compress compress alignment (.paf), graph (.gfa, .og), and MSA (.maf) outputs with pigz,"
echo " and variant (.vcf) outputs with bgzip"
echo " --no-pansn Pangenome Sequence Naming (PanSN) not required for sequence names"
echo " --version display the version of pggb"
echo " -h, --help this text"
echo
echo "Use wfmash, seqwish, smoothxg, odgi, gfaffix, and vg to build, project and display a pangenome graph."
exit
elif [ "$input_fasta" = "false" ]; then
>&2 echo "[pggb] ERROR: mandatory argument: -i/--input-fasta"
elif [ "$input_fasta" = "false" ] || [ "$output_dir" = "false" ]; then
>&2 echo "[pggb] ERROR: mandatory argument: -i/--input-fasta and -o/--output-dir"
exit
elif [ ! -f "${input_fasta}.fai" ]; then
echo "[pggb] ERROR: index for $input_fasta does not exist. Please create it using 'samtools faidx $input_fasta'."
exit 1
elif [ "$respect_pansn" = "false" ] && { [ "$n_haps" = "false" ] || [ "$n_haps" -lt 1 ]; }; then
>&2 echo "[pggb] ERROR: when no-pansn is set, -n/--n-haplotypes must be greater than or equal to 1"
exit
elif [ "$respect_pansn" = "false" -a "$vcf_spec" != "false" ]; then
>&2 echo "[pggb] ERROR: -V/--vcf-spec and --no-pansn are incompatible"
exit
elif [ "$n_mappings" -lt 1 ]; then
>&2 echo "[pggb] ERROR: -c/--n-mappings must be greater than or equal to 1"
exit
Expand All @@ -280,10 +270,19 @@ while IFS= read -r line; do
fi
done < <(cut -f 1 "${input_fasta}.fai")
if [ "$pansn_not_respected" != "false" ]; then
if [ "$respect_pansn" = "false" ]; then
>&2 echo "[pggb] warning: there are sequence names (like '$pansn_not_respected') that do not match the Pangenome Sequence Naming (PanSN)."
else
>&2 echo "[pggb] ERROR: there are sequence names (like '$pansn_not_respected') that do not match the Pangenome Sequence Naming (PanSN). Set --pan-sn to disable this requirement."
>&2 echo "[pggb] warning: there are sequence names (like '$pansn_not_respected') that do not match the Pangenome Sequence Naming (PanSN)."

pansn_error=false
if [ "$vcf_spec" != "false" ]; then
pansn_error=true
>&2 echo "[pggb] ERROR: -V/--vcf-spec cannot be used if the Pangenome Sequence Naming (PanSN) is not respected."
fi
if [ "$n_haps" = "false" ] || [ "$n_haps" -lt 1 ]; then
pansn_error=true
>&2 echo "[pggb] ERROR: -n/--n-haplotypes must be greater than or equal to 1 when the Pangenome Sequence Naming (PanSN) is not respected."
fi

if [ $pansn_error = true ]; then
exit
fi
fi
Expand Down Expand Up @@ -478,7 +477,7 @@ reporting:
multiqc: $multiqc
EOT


#-------------------------------------------------------------------------------
echo -e "\nRunning pggb\n" >> "$log_file"

if [[ "$input_paf" == false ]]; then
Expand Down

0 comments on commit 24b423c

Please sign in to comment.