diff --git a/VERSION b/VERSION index 437459c..73462a5 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.5.0 +2.5.1 diff --git a/src/alignment-file.c b/src/alignment-file.c index dcdbd49..b4af5a9 100644 --- a/src/alignment-file.c +++ b/src/alignment-file.c @@ -105,7 +105,11 @@ void get_bases_for_each_snp(char filename[], char ** bases_for_snps) gzclose(fp); } -void detect_snps(char filename[], int pure_mode, int output_monomorphic, int output_constant_site_counts) +void detect_snps(char filename[], int pure_mode, int output_monomorphic) { + detect_snps_count_constant_sites(filename, pure_mode, output_monomorphic, NULL); +} + +void detect_snps_count_constant_sites(char filename[], int pure_mode, int output_monomorphic, int* constant_site_counts) { int i; int l; @@ -113,7 +117,6 @@ void detect_snps(char filename[], int pure_mode, int output_monomorphic, int out number_of_samples = 0; length_of_genome = 0; char * first_sequence; - int base_counts[] = {0, 0, 0, 0}; /* array below allows quick mapping of A, C, T and G characters to indices in base_counts array */ const int char_to_base_count_index[] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, 1, -1, -1, -1, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 3}; @@ -195,15 +198,12 @@ void detect_snps(char filename[], int pure_mode, int output_monomorphic, int out { snp_locations[current_snp_index] = i; current_snp_index++; - } else if (is_pure(first_sequence[i])) { - base_counts[char_to_base_count_index[(int) toupper(first_sequence[i])]]++; + } else if (constant_site_counts != NULL && is_pure(first_sequence[i])) { + constant_site_counts[char_to_base_count_index[(int) toupper(first_sequence[i])]]++; } } - if (output_constant_site_counts) - printf("%d,%d,%d,%d\n", base_counts[0], base_counts[1], base_counts[2], base_counts[3]); - free(first_sequence); kseq_destroy(seq); gzclose(fp); diff --git a/src/alignment-file.h b/src/alignment-file.h index e1be650..19de2b4 100644 --- a/src/alignment-file.h +++ b/src/alignment-file.h @@ -22,7 +22,8 @@ #include "kseq.h" -void detect_snps( char filename[], int pure_mode, int output_monomorphic, int output_constant_site_counts); +void detect_snps( char filename[], int pure_mode, int output_monomorphic); +void detect_snps_count_constant_sites(char filename[], int pure_mode, int output_monomorphic, int *constant_site_counts); void get_bases_for_each_snp(char filename[], char ** bases_for_snps); int is_unknown(char base); int get_length_of_genome(); diff --git a/src/main.c b/src/main.c index 928a791..17305dc 100644 --- a/src/main.c +++ b/src/main.c @@ -24,7 +24,6 @@ #include #include #include -#include "alignment-file.h" #include "snp-sites.h" #include "config.h" @@ -126,7 +125,7 @@ int main (int argc, char **argv) { strncpy(multi_fasta_filename, argv[optind], FILENAME_MAX); if (output_constant_site_counts) { - detect_snps(multi_fasta_filename, pure_mode, output_monomorphic, output_constant_site_counts); + count_constant_sites(multi_fasta_filename, output_filename); } else if( pure_mode || output_monomorphic) { generate_snp_sites_with_ref_pure_mono(multi_fasta_filename, diff --git a/src/snp-sites.c b/src/snp-sites.c index ee66a1a..d2a0640 100644 --- a/src/snp-sites.c +++ b/src/snp-sites.c @@ -38,7 +38,7 @@ static int generate_snp_sites_generic(char filename[], int output_monomorphic) { int i; - detect_snps(filename, pure_mode, output_monomorphic, 0); + detect_snps(filename, pure_mode, output_monomorphic); bases_for_snps = calloc(get_number_of_snps()+1, sizeof(char*)); @@ -141,7 +141,32 @@ int generate_snp_sites_with_ref_pure_mono(char filename[], output_filename, output_reference, pure_mode, output_monomorphic); } +void count_constant_sites(char multi_fasta_filename[], char output_filename[]) { + char cwd[100]; + FILE *input_file; + FILE *output_file; + int *constant_site_counts = NULL; + output_file = (FILE *) fopen(output_filename, "w"); + if (!output_file) { + fprintf(stderr, "ERROR: cannot open %s for writing: %s\n", output_filename, strerror(errno)); + exit(EXIT_FAILURE); + } + + constant_site_counts = (int *) calloc(4, sizeof(int)); + if (constant_site_counts == NULL) { + fprintf(stderr, "ERROR: cannot allocated memory for constant_site_counts"); + exit(EXIT_FAILURE); + } + + detect_snps_count_constant_sites(multi_fasta_filename, 0, 0, constant_site_counts); + + + fprintf(output_file, "%d,%d,%d,%d\n", constant_site_counts[0], constant_site_counts[1], + constant_site_counts[2], constant_site_counts[3]); + fclose(output_file); + free(constant_site_counts); +} // Inefficient void strip_directory_from_filename(char * input_filename, char * output_filename) { diff --git a/src/snp-sites.h b/src/snp-sites.h index 8ba1021..ab24769 100644 --- a/src/snp-sites.h +++ b/src/snp-sites.h @@ -22,6 +22,9 @@ #define _SNP_SITES_H_ #include +#include +#include +#include int generate_snp_sites(char filename[], int output_multi_fasta_file, @@ -42,7 +45,9 @@ int generate_snp_sites_with_ref_pure_mono(char filename[], int output_reference, int pure_mode, int output_monomorphic); - + +void count_constant_sites(char multi_fasta_filename[], char filename[]); + void strip_directory_from_filename(char *input_filename, char *output_filename); diff --git a/tests/check-snp-sites.c b/tests/check-snp-sites.c index e0218fe..809521f 100644 --- a/tests/check-snp-sites.c +++ b/tests/check-snp-sites.c @@ -183,49 +183,49 @@ END_TEST START_TEST (valid_genome_length) { - detect_snps("../tests/data/alignment_file_one_line_per_sequence.aln",0,0,0); + detect_snps("../tests/data/alignment_file_one_line_per_sequence.aln",0,0); fail_unless( get_length_of_genome() == 2000 ); } END_TEST START_TEST (valid_genome_length_with_multiple_lines_per_sequence) { - detect_snps("../tests/data/alignment_file_multiple_lines_per_sequence.aln",0,0,0); + detect_snps("../tests/data/alignment_file_multiple_lines_per_sequence.aln",0,0); fail_unless( get_length_of_genome() == 2000 ); } END_TEST START_TEST (valid_number_of_sequences_in_file) { - detect_snps("../tests/data/alignment_file_one_line_per_sequence.aln",0,0,0); + detect_snps("../tests/data/alignment_file_one_line_per_sequence.aln",0,0); fail_unless( get_number_of_samples() == 109 ); } END_TEST START_TEST (valid_number_of_sequences_in_file_with_multiple_lines_per_sequence) { - detect_snps("../tests/data/alignment_file_multiple_lines_per_sequence.aln",0,0,0); + detect_snps("../tests/data/alignment_file_multiple_lines_per_sequence.aln",0,0); fail_unless( get_number_of_samples() == 109 ); } END_TEST START_TEST (number_of_snps_detected) { - detect_snps("../tests/data/alignment_file_multiple_lines_per_sequence.aln",0,0,0); + detect_snps("../tests/data/alignment_file_multiple_lines_per_sequence.aln",0,0); fail_unless( get_number_of_snps() == 5); } END_TEST START_TEST (number_of_snps_detected_small) { - detect_snps("../tests/data/small_alignment.aln",0,0,0); + detect_snps("../tests/data/small_alignment.aln",0,0); fail_unless( get_number_of_snps() == 1); } END_TEST START_TEST (detect_snps_pure_mode) { - detect_snps("../tests/data/pure_mode_alignment.aln",1,0,0); + detect_snps("../tests/data/pure_mode_alignment.aln",1,0); fail_unless( get_number_of_snps() == 2); } END_TEST @@ -233,14 +233,14 @@ END_TEST START_TEST (detect_snps_pure_mode_monomorphic) { - detect_snps("../tests/data/pure_mode_monomorphic_alignment.aln",1,1,0); + detect_snps("../tests/data/pure_mode_monomorphic_alignment.aln",1,1); fail_unless( get_number_of_snps() == 3); } END_TEST START_TEST (sample_names_from_alignment_file) { - detect_snps("../tests/data/small_alignment.aln",0,0,0); + detect_snps("../tests/data/small_alignment.aln",0,0); char ** current_sequence_names = get_sequence_names(); fail_unless(strcmp(current_sequence_names[0],"reference_sequence") == 0); @@ -269,6 +269,14 @@ START_TEST (check_strip_directory_from_filename_with_directory) } END_TEST +START_TEST (check_count_constant_sites) +{ + count_constant_sites("../tests/data/small_alignment.aln", "small_alignment.constant_site_counts.txt"); + fail_unless(compare_files("../tests/data/small_alignment.constant_site_counts.txt", "small_alignment.constant_site_counts.txt")); + remove("small_alignment.constant_site_counts.txt"); +} +END_TEST + Suite * snp_sites_suite (void) { Suite *s = suite_create ("Creating_SNP_Sites"); @@ -302,7 +310,8 @@ Suite * snp_sites_suite (void) tcase_add_test (tc_snp_sites, valid_phylip_plus_reference); tcase_add_test (tc_snp_sites, valid_alignment_with_pure_mode); tcase_add_test (tc_snp_sites, valid_alignment_with_monomorphic_sites); - + tcase_add_test (tc_snp_sites, check_count_constant_sites); + tcase_add_exit_test(tc_snp_sites, invalid_with_uneven_file_lengths,EXIT_FAILURE); remove("uneven_alignment.aln.snp_sites.aln"); suite_add_tcase (s, tc_snp_sites); diff --git a/tests/data/small_alignment.constant_site_counts.txt b/tests/data/small_alignment.constant_site_counts.txt new file mode 100644 index 0000000..250ccd4 --- /dev/null +++ b/tests/data/small_alignment.constant_site_counts.txt @@ -0,0 +1 @@ +2,2,1,2