From 25919b7a23d51acf3bb6bae70833bb18e2fa84df Mon Sep 17 00:00:00 2001 From: Danilo Horta Date: Thu, 15 Feb 2024 17:18:25 +0000 Subject: [PATCH] save hit_start hit_stop --- c-core/CMakeLists.txt | 2 +- c-core/product.c | 2 ++ c-core/product_line.c | 3 +++ c-core/product_line.h | 5 +++++ c-core/product_thread.c | 2 ++ c-core/test_massive.c | 2 +- c-core/test_scan.c | 4 ++-- c-core/test_window.c | 2 +- c-core/thread.c | 13 ++++++++----- 9 files changed, 25 insertions(+), 10 deletions(-) diff --git a/c-core/CMakeLists.txt b/c-core/CMakeLists.txt index 63c8a40d..6980f0fd 100644 --- a/c-core/CMakeLists.txt +++ b/c-core/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.20.2 FATAL_ERROR) -project(deciphon VERSION 0.17.2 LANGUAGES C) +project(deciphon VERSION 0.17.3 LANGUAGES C) include(cmake/warnings.cmake) include(cmake/sanitizers.cmake) diff --git a/c-core/product.c b/c-core/product.c index 85b4b6c2..fdcb010b 100644 --- a/c-core/product.c +++ b/c-core/product.c @@ -64,6 +64,8 @@ int product_close(struct product *x) ok &= fputs("window\t", fp) >= 0; ok &= fputs("window_start\t", fp) >= 0; ok &= fputs("window_stop\t", fp) >= 0; + ok &= fputs("hit_start\t", fp) >= 0; + ok &= fputs("hit_stop\t", fp) >= 0; ok &= fputs("profile\t", fp) >= 0; ok &= fputs("abc\t", fp) >= 0; ok &= fputs("lrt\t", fp) >= 0; diff --git a/c-core/product_line.c b/c-core/product_line.c index a72307a7..11f30e01 100644 --- a/c-core/product_line.c +++ b/c-core/product_line.c @@ -13,6 +13,9 @@ void product_line_init(struct product_line *x) x->window_start = 0; x->window_stop = 0; + x->hit_start = 0; + x->hit_stop = 0; + memset(x->protein, 0, sizeof_field(struct product_line, protein)); memset(x->abc, 0, sizeof_field(struct product_line, abc)); diff --git a/c-core/product_line.h b/c-core/product_line.h index c89b6fe8..32bf9289 100644 --- a/c-core/product_line.h +++ b/c-core/product_line.h @@ -6,9 +6,14 @@ struct product_line long sequence; int window; + // [window_start, window_stop) int window_start; int window_stop; + // [hit_start, hit_stop) + int hit_start; + int hit_stop; + char protein[64]; char abc[16]; diff --git a/c-core/product_thread.c b/c-core/product_thread.c index 42d58ddf..df7660a1 100644 --- a/c-core/product_thread.c +++ b/c-core/product_thread.c @@ -38,6 +38,8 @@ int product_thread_put_match(struct product_thread *x, struct match *match, if (fprintf(fp, "%d\t", line->window) < 0) defer_error(DCP_EWRITEPROD); if (fprintf(fp, "%d\t", line->window_start) < 0) defer_error(DCP_EWRITEPROD); if (fprintf(fp, "%d\t", line->window_stop) < 0) defer_error(DCP_EWRITEPROD); + if (fprintf(fp, "%d\t", line->hit_start) < 0) defer_error(DCP_EWRITEPROD); + if (fprintf(fp, "%d\t", line->hit_stop) < 0) defer_error(DCP_EWRITEPROD); if (fprintf(fp, "%s\t", line->protein) < 0) defer_error(DCP_EWRITEPROD); if (fprintf(fp, "%s\t", line->abc) < 0) defer_error(DCP_EWRITEPROD); if (fprintf(fp, "%.1f\t", line->lrt) < 0) defer_error(DCP_EWRITEPROD); diff --git a/c-core/test_massive.c b/c-core/test_massive.c index 62f650b2..5c22ae1a 100644 --- a/c-core/test_massive.c +++ b/c-core/test_massive.c @@ -31,7 +31,7 @@ int main(void) } eq(scan_run(scan, PRODDIR, NULL, NULL), 0); eq(scan_progress(scan), 100); - eq(chksum(PRODDIR "/products.tsv"), 27703); + eq(chksum(PRODDIR "/products.tsv"), 61836); eq(scan_close(scan), 0); scan_del(scan); diff --git a/c-core/test_scan.c b/c-core/test_scan.c index c009b5c1..c3b70a4e 100644 --- a/c-core/test_scan.c +++ b/c-core/test_scan.c @@ -15,8 +15,8 @@ static struct params params_list[] = { {1, false, false}, {1, false, false}, {1, false, true}, {1, false, true}, {1, true, false}, {1, true, false}, {1, true, true}, {1, true, true}}; static bool dial_list[] = {true, false, true, false, true, false, true, false}; -static long chksum_list[] = {55932, 55932, 35953, 35953, - 4940, 4940, 4940, 4940}; +static long chksum_list[] = {10096, 10096, 27848, 27848, + 44180, 44180, 44180, 44180}; static void test_invalid_sequence(); static void test_normal_scan(void); diff --git a/c-core/test_window.c b/c-core/test_window.c index 1a730a43..00175fa9 100644 --- a/c-core/test_window.c +++ b/c-core/test_window.c @@ -40,7 +40,7 @@ int main(void) eq(scan_add(scan, sequences[0].id, sequences[0].name, seq), 0); eq(scan_run(scan, PRODDIR, NULL, NULL), 0); eq(scan_progress(scan), 100); - eq(chksum(PRODDIR "/products.tsv"), 25112); + eq(chksum(PRODDIR "/products.tsv"), 38063); eq(scan_close(scan), 0); scan_del(scan); diff --git a/c-core/thread.c b/c-core/thread.c index c596d0db..1f611ab5 100644 --- a/c-core/thread.c +++ b/c-core/thread.c @@ -134,7 +134,7 @@ static int code_fn(int pos, int len, void *arg) } static int trim_path(struct protein *, struct imm_seq const *, - struct imm_path *, struct imm_seq *); + struct imm_path *, struct imm_seq *, int *seqstart); static int process_window(struct thread *x, int protein_idx, struct window const *w) @@ -174,8 +174,11 @@ static int process_window(struct thread *x, int protein_idx, subseq = seq->imm.seq; else { - if ((rc = trim_path(&x->protein, &seq->imm.seq, subpath, &subseq))) + line->hit_start = 0; + if ((rc = trim_path(&x->protein, &seq->imm.seq, subpath, &subseq, + &line->hit_start))) return rc; + line->hit_stop = line->hit_start + imm_seq_size(&subseq); } if (hmmer_online(&x->hmmer)) @@ -206,7 +209,7 @@ static int process_window(struct thread *x, int protein_idx, } static int trim_path(struct protein *protein, struct imm_seq const *seq, - struct imm_path *path, struct imm_seq *subseq) + struct imm_path *path, struct imm_seq *subseq, int *seqstart) { int rc = 0; @@ -225,7 +228,7 @@ static int trim_path(struct protein *protein, struct imm_seq const *seq, } if (rc) return rc; start = match_iter_tell(&it) - 1; - int seqstart = match_iter_seqtell(&it) - match.step.seqsize; + *seqstart = match_iter_seqtell(&it) - match.step.seqsize; if ((rc = match_iter_seek(&it, &match, INT_MAX))) return rc; while (!(rc = match_iter_prev(&it, &match))) @@ -239,7 +242,7 @@ static int trim_path(struct protein *protein, struct imm_seq const *seq, match_iter_seek(&it, &match, start); - *subseq = imm_seq_slice(seq, imm_range(seqstart, seqstop)); + *subseq = imm_seq_slice(seq, imm_range(*seqstart, seqstop)); imm_path_cut(path, start, stop - start); return 0;