Skip to content

Commit

Permalink
Fixes segmentation fault when building SA index (#11), and improves t…
Browse files Browse the repository at this point in the history
…he help display for build index commands
  • Loading branch information
jtarraga committed Jan 29, 2015
1 parent 2b770bb commit 59e9ba3
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 21 deletions.
47 changes: 35 additions & 12 deletions src/build-index/index_builder.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "index_builder.h"


//------------------------------------------------------------------------------------
//------------------------------------------------------------------------------------

Expand Down Expand Up @@ -36,9 +35,11 @@ void index_options_free(index_options_t *options) {
//------------------------------------------------------------------------------------

void** argtable_index_options_new(int mode) {
int num_options = NUM_INDEX_OPTIONS;
int num_options;
if (mode == BWT_INDEX) {
num_options += NUM_INDEX_BWT_OPTIONS;
num_options = NUM_INDEX_BWT_OPTIONS;
} else {
num_options = NUM_INDEX_SA_OPTIONS;
}

// NUM_OPTIONS +1 to allocate end structure
Expand Down Expand Up @@ -98,29 +99,51 @@ index_options_t *read_CLI_index_options(void **argtable, index_options_t *option
//------------------------------------------------------------------------------------

void usage_index(void **argtable, int mode) {
printf("\nUsage:\n\t%s %s <options>\n", HPG_ALIGNER_BIN,
printf("\n");
printf("+===============================================================+\n");
if (mode == BWT_INDEX) {
printf("| HPG-Aligner help for building BWT index |\n");
} else {
printf("| HPG-Aligner help for building SA index |\n");
}
printf("+===============================================================+\n");
printf("Usage:\n");
printf("\t%s %s -g|--ref-genome=<file> -i|--index=<file> [options]\n",
HPG_ALIGNER_BIN,
(mode == BWT_INDEX ? "build-bwt-index" : "build-sa-index"));

//arg_print_syntaxv(stdout, argtable, "\n");
printf("\nOptions:\n");
arg_print_glossary(stdout, argtable, "\t%-50s\t%s\n");

exit(0);
printf("\n");
printf("Mandatory parameters:\n");
printf("\t-g, --ref-genome Reference genome (FASTA format)\n");
printf("\t-i, --index=<file> Index directory name\n");
printf("\n");
printf("Options:\n");
printf("\t-a, --alternative-map=<file> Alternative mapping filename. This two-columns file contains the alternative sequence names with their corresponding chromosome names (only for SA index)\n");
printf("\t-d, --decoy-genome=<file> Decoy genome in FASTA format (only for SA index)\n");
printf("\t-v, --version Display version\n");
printf("\t-h, --help Help option\n");
}

//------------------------------------------------------------------------------------

index_options_t *parse_index_options(int argc, char **argv) {
int mode = SA_INDEX, num_options = NUM_INDEX_OPTIONS;
int mode, num_options;

if (strcmp(argv[0], "build-bwt-index") == 0) {
mode = BWT_INDEX;
num_options += NUM_INDEX_BWT_OPTIONS;
num_options = NUM_INDEX_BWT_OPTIONS;
} else if (strcmp(argv[0], "build-sa-index") == 0) {
mode = SA_INDEX;
}
num_options = NUM_INDEX_SA_OPTIONS;
} else {
fprintf(stdout, "\nErrors:\n");
printf("\tUnknown command: %s\n", argv[0]);
usage_index(NULL, mode);
exit(-1);
}

void **argtable = argtable_index_options_new(mode);

index_options_t *options = index_options_new();
if (argc < 2) {
usage_index(argtable, mode);
Expand Down
4 changes: 2 additions & 2 deletions src/build-index/index_builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
#define SA_INDEX 0
#define BWT_INDEX 1

#define NUM_INDEX_OPTIONS 6
#define NUM_INDEX_BWT_OPTIONS 0
#define NUM_INDEX_SA_OPTIONS 6
#define NUM_INDEX_BWT_OPTIONS 5

#define BWT_RATIO_DEFAULT 8

Expand Down
11 changes: 6 additions & 5 deletions src/sa/sa_index3.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,12 @@ char *alt_names_display(alt_names_t *p) {
//--------------------------------------------------------------------------------------

sa_genome3_t *read_genome3(char *genome_filename) {
return read_genome3_ex(genome_filename, NULL);
return read_genome3_alt(genome_filename, NULL);
}

//--------------------------------------------------------------------------------------

sa_genome3_t *read_genome3_ex(char *genome_filename, char *alt_filename) {
sa_genome3_t *read_genome3_alt(char *genome_filename, char *alt_filename) {

const int MAX_CHROM_NAME_LENGHT = 1024;
uint reading_name, seq_name_count = 0;
Expand Down Expand Up @@ -289,6 +289,7 @@ sa_genome3_t *read_genome3_ex(char *genome_filename, char *alt_filename) {
// update chromosomes for ALT sequences: chromosomes and flanks
char *chrom_name;
for (size_t i = 0; i < num_seqs; i++) {
seq_chroms[i] = i;
if (seq_flags[i] == ALT_FLAG) {
chrom_name = alt_names_get_chrom_name(seq_names[i], alt_names);
if (chrom_name) {
Expand Down Expand Up @@ -1201,7 +1202,7 @@ void sa_index3_build_k18_alt(char *genome_filename, char *alt_filename,
sprintf(filename_tab, "%s/%s.S", sa_index_dirname, prefix);
printf("\nreading file genome %s...\n", genome_filename);
gettimeofday(&start, NULL);
sa_genome3_t *genome = read_genome3_ex(genome_filename, alt_filename);
sa_genome3_t *genome = read_genome3_alt(genome_filename, alt_filename);
gettimeofday(&stop, NULL);

if (genome->length > MAX_GENOME_LENGTH || genome->num_seqs > MAX_NUM_SEQUENCES) {
Expand All @@ -1215,7 +1216,7 @@ void sa_index3_build_k18_alt(char *genome_filename, char *alt_filename,
exit(-1);
}

//sa_genome3_display(genome);
// sa_genome3_display(genome);

// write S to file
f_tab = fopen(filename_tab, "wb");
Expand Down Expand Up @@ -1525,7 +1526,7 @@ void sa_index3_build_k18_alt(char *genome_filename, char *alt_filename,
fprintf(f_tab, "6. Number of suffixes\n");
fprintf(f_tab, "7. Genome length\n");
fprintf(f_tab, "8. Number of sequencess\n");
fprintf(f_tab, "9. One line per sequence: name, length, type, chrom, start, end, left and right flanks (the last five fields for HAP sequences)\n");
fprintf(f_tab, "9. One line per sequence: name, length, type, chrom, start, end, left and right flanks (the last five fields for ALT sequences)\n");
fclose(f_tab);

sprintf(filename_tab, "%s/index", sa_index_dirname);
Expand Down
9 changes: 7 additions & 2 deletions src/sa/sa_index3.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ typedef struct sa_genome3 {
//--------------------------------------------------------------------------------------

sa_genome3_t *read_genome3(char *genome_filename);
sa_genome3_t *read_genome3_ex(char *genome_filename, char *alt_filename);
sa_genome3_t *read_genome3_alt(char *genome_filename, char *alt_filename);

//--------------------------------------------------------------------------------------

Expand All @@ -83,6 +83,9 @@ static inline sa_genome3_t *sa_genome3_new(size_t length, size_t num_seqs,
p->num_seqs = num_seqs;
p->seq_lengths = seq_lengths;
p->seq_flags = seq_flags;
p->seq_chroms = seq_chroms;
p->seq_starts = seq_starts;
p->seq_ends = seq_ends;
if (num_seqs && seq_lengths) {
p->seq_offsets = (size_t *) calloc(num_seqs, sizeof(size_t));
size_t offset = 0;
Expand All @@ -105,6 +108,8 @@ static inline sa_genome3_t *sa_genome3_new(size_t length, size_t num_seqs,
size_t flank_size;
char *alt_seq, *chrom_seq;
for (size_t i = 0; i < num_seqs; i++) {
left_flanks[i] = 0;
right_flanks[i] = 0;
if (seq_flags[i] == ALT_FLAG) {
// calculate left flank
flank_size = 0;
Expand Down Expand Up @@ -195,7 +200,7 @@ static inline void sa_genome3_display(sa_genome3_t *p) {
printf("%u\t%s\t%s\t%lu\t%lu\t%lu\t%s\t%lu\t%lu\t%lu\t%lu\n",
i, GET_SEQ_FLAG_NAME(p->seq_flags[i]), p->seq_names[i],
p->seq_lengths[i], p->seq_offsets[i],
p->seq_chroms[i], (p->seq_flags[i] == ALT_FLAG ? p->seq_names[p->seq_chroms[i]] : ""),
p->seq_chroms[i], p->seq_names[p->seq_chroms[i]],
p->seq_starts[i], p->seq_ends[i],
p->left_flanks[i], p->right_flanks[i]);
}
Expand Down

0 comments on commit 59e9ba3

Please sign in to comment.