From 59e9ba3e68df2fa784c49cfa81f8958a95fea804 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Joaqu=C3=ADn=20T=C3=A1rraga=20Gim=C3=A9nez?= Date: Thu, 29 Jan 2015 13:11:39 +0100 Subject: [PATCH] Fixes segmentation fault when building SA index (#11), and improves the help display for build index commands --- src/build-index/index_builder.c | 47 ++++++++++++++++++++++++--------- src/build-index/index_builder.h | 4 +-- src/sa/sa_index3.c | 11 ++++---- src/sa/sa_index3.h | 9 +++++-- 4 files changed, 50 insertions(+), 21 deletions(-) diff --git a/src/build-index/index_builder.c b/src/build-index/index_builder.c index 868f221..3dc6055 100644 --- a/src/build-index/index_builder.c +++ b/src/build-index/index_builder.c @@ -1,6 +1,5 @@ #include "index_builder.h" - //------------------------------------------------------------------------------------ //------------------------------------------------------------------------------------ @@ -36,9 +35,11 @@ void index_options_free(index_options_t *options) { //------------------------------------------------------------------------------------ void** argtable_index_options_new(int mode) { - int num_options = NUM_INDEX_OPTIONS; + int num_options; if (mode == BWT_INDEX) { - num_options += NUM_INDEX_BWT_OPTIONS; + num_options = NUM_INDEX_BWT_OPTIONS; + } else { + num_options = NUM_INDEX_SA_OPTIONS; } // NUM_OPTIONS +1 to allocate end structure @@ -98,29 +99,51 @@ index_options_t *read_CLI_index_options(void **argtable, index_options_t *option //------------------------------------------------------------------------------------ void usage_index(void **argtable, int mode) { - printf("\nUsage:\n\t%s %s \n", HPG_ALIGNER_BIN, + printf("\n"); + printf("+===============================================================+\n"); + if (mode == BWT_INDEX) { + printf("| HPG-Aligner help for building BWT index |\n"); + } else { + printf("| HPG-Aligner help for building SA index |\n"); + } + printf("+===============================================================+\n"); + printf("Usage:\n"); + printf("\t%s %s -g|--ref-genome= -i|--index= [options]\n", + HPG_ALIGNER_BIN, (mode == BWT_INDEX ? "build-bwt-index" : "build-sa-index")); - //arg_print_syntaxv(stdout, argtable, "\n"); - printf("\nOptions:\n"); - arg_print_glossary(stdout, argtable, "\t%-50s\t%s\n"); - - exit(0); + printf("\n"); + printf("Mandatory parameters:\n"); + printf("\t-g, --ref-genome Reference genome (FASTA format)\n"); + printf("\t-i, --index= Index directory name\n"); + printf("\n"); + printf("Options:\n"); + printf("\t-a, --alternative-map= Alternative mapping filename. This two-columns file contains the alternative sequence names with their corresponding chromosome names (only for SA index)\n"); + printf("\t-d, --decoy-genome= Decoy genome in FASTA format (only for SA index)\n"); + printf("\t-v, --version Display version\n"); + printf("\t-h, --help Help option\n"); } //------------------------------------------------------------------------------------ index_options_t *parse_index_options(int argc, char **argv) { - int mode = SA_INDEX, num_options = NUM_INDEX_OPTIONS; + int mode, num_options; if (strcmp(argv[0], "build-bwt-index") == 0) { mode = BWT_INDEX; - num_options += NUM_INDEX_BWT_OPTIONS; + num_options = NUM_INDEX_BWT_OPTIONS; } else if (strcmp(argv[0], "build-sa-index") == 0) { mode = SA_INDEX; - } + num_options = NUM_INDEX_SA_OPTIONS; + } else { + fprintf(stdout, "\nErrors:\n"); + printf("\tUnknown command: %s\n", argv[0]); + usage_index(NULL, mode); + exit(-1); + } void **argtable = argtable_index_options_new(mode); + index_options_t *options = index_options_new(); if (argc < 2) { usage_index(argtable, mode); diff --git a/src/build-index/index_builder.h b/src/build-index/index_builder.h index 67bc669..c701cd3 100644 --- a/src/build-index/index_builder.h +++ b/src/build-index/index_builder.h @@ -15,8 +15,8 @@ #define SA_INDEX 0 #define BWT_INDEX 1 -#define NUM_INDEX_OPTIONS 6 -#define NUM_INDEX_BWT_OPTIONS 0 +#define NUM_INDEX_SA_OPTIONS 6 +#define NUM_INDEX_BWT_OPTIONS 5 #define BWT_RATIO_DEFAULT 8 diff --git a/src/sa/sa_index3.c b/src/sa/sa_index3.c index 5ce7d97..ddc7f25 100644 --- a/src/sa/sa_index3.c +++ b/src/sa/sa_index3.c @@ -97,12 +97,12 @@ char *alt_names_display(alt_names_t *p) { //-------------------------------------------------------------------------------------- sa_genome3_t *read_genome3(char *genome_filename) { - return read_genome3_ex(genome_filename, NULL); + return read_genome3_alt(genome_filename, NULL); } //-------------------------------------------------------------------------------------- -sa_genome3_t *read_genome3_ex(char *genome_filename, char *alt_filename) { +sa_genome3_t *read_genome3_alt(char *genome_filename, char *alt_filename) { const int MAX_CHROM_NAME_LENGHT = 1024; uint reading_name, seq_name_count = 0; @@ -289,6 +289,7 @@ sa_genome3_t *read_genome3_ex(char *genome_filename, char *alt_filename) { // update chromosomes for ALT sequences: chromosomes and flanks char *chrom_name; for (size_t i = 0; i < num_seqs; i++) { + seq_chroms[i] = i; if (seq_flags[i] == ALT_FLAG) { chrom_name = alt_names_get_chrom_name(seq_names[i], alt_names); if (chrom_name) { @@ -1201,7 +1202,7 @@ void sa_index3_build_k18_alt(char *genome_filename, char *alt_filename, sprintf(filename_tab, "%s/%s.S", sa_index_dirname, prefix); printf("\nreading file genome %s...\n", genome_filename); gettimeofday(&start, NULL); - sa_genome3_t *genome = read_genome3_ex(genome_filename, alt_filename); + sa_genome3_t *genome = read_genome3_alt(genome_filename, alt_filename); gettimeofday(&stop, NULL); if (genome->length > MAX_GENOME_LENGTH || genome->num_seqs > MAX_NUM_SEQUENCES) { @@ -1215,7 +1216,7 @@ void sa_index3_build_k18_alt(char *genome_filename, char *alt_filename, exit(-1); } - //sa_genome3_display(genome); + // sa_genome3_display(genome); // write S to file f_tab = fopen(filename_tab, "wb"); @@ -1525,7 +1526,7 @@ void sa_index3_build_k18_alt(char *genome_filename, char *alt_filename, fprintf(f_tab, "6. Number of suffixes\n"); fprintf(f_tab, "7. Genome length\n"); fprintf(f_tab, "8. Number of sequencess\n"); - fprintf(f_tab, "9. One line per sequence: name, length, type, chrom, start, end, left and right flanks (the last five fields for HAP sequences)\n"); + fprintf(f_tab, "9. One line per sequence: name, length, type, chrom, start, end, left and right flanks (the last five fields for ALT sequences)\n"); fclose(f_tab); sprintf(filename_tab, "%s/index", sa_index_dirname); diff --git a/src/sa/sa_index3.h b/src/sa/sa_index3.h index 763b978..7990b9d 100644 --- a/src/sa/sa_index3.h +++ b/src/sa/sa_index3.h @@ -69,7 +69,7 @@ typedef struct sa_genome3 { //-------------------------------------------------------------------------------------- sa_genome3_t *read_genome3(char *genome_filename); -sa_genome3_t *read_genome3_ex(char *genome_filename, char *alt_filename); +sa_genome3_t *read_genome3_alt(char *genome_filename, char *alt_filename); //-------------------------------------------------------------------------------------- @@ -83,6 +83,9 @@ static inline sa_genome3_t *sa_genome3_new(size_t length, size_t num_seqs, p->num_seqs = num_seqs; p->seq_lengths = seq_lengths; p->seq_flags = seq_flags; + p->seq_chroms = seq_chroms; + p->seq_starts = seq_starts; + p->seq_ends = seq_ends; if (num_seqs && seq_lengths) { p->seq_offsets = (size_t *) calloc(num_seqs, sizeof(size_t)); size_t offset = 0; @@ -105,6 +108,8 @@ static inline sa_genome3_t *sa_genome3_new(size_t length, size_t num_seqs, size_t flank_size; char *alt_seq, *chrom_seq; for (size_t i = 0; i < num_seqs; i++) { + left_flanks[i] = 0; + right_flanks[i] = 0; if (seq_flags[i] == ALT_FLAG) { // calculate left flank flank_size = 0; @@ -195,7 +200,7 @@ static inline void sa_genome3_display(sa_genome3_t *p) { printf("%u\t%s\t%s\t%lu\t%lu\t%lu\t%s\t%lu\t%lu\t%lu\t%lu\n", i, GET_SEQ_FLAG_NAME(p->seq_flags[i]), p->seq_names[i], p->seq_lengths[i], p->seq_offsets[i], - p->seq_chroms[i], (p->seq_flags[i] == ALT_FLAG ? p->seq_names[p->seq_chroms[i]] : ""), + p->seq_chroms[i], p->seq_names[p->seq_chroms[i]], p->seq_starts[i], p->seq_ends[i], p->left_flanks[i], p->right_flanks[i]); }