diff --git a/c-core/setup_viterbi.c b/c-core/setup_viterbi.c new file mode 100644 index 0000000..d0a56ef --- /dev/null +++ b/c-core/setup_viterbi.c @@ -0,0 +1,103 @@ +#include "protein.h" +#include "protein_node.h" +#include "vit.h" +#include + +struct viterbi_xtrans +{ + float const SB; + float const SN; + float const NN; + float const NB; + + float const ET; + float const EC; + float const CC; + float const CT; + + float const EB; + float const EJ; + float const JJ; + float const JB; + + float const ME; + float const DE; +}; + +struct viterbi_xtrans viterbi_xtrans_init(struct xtrans x) +{ + return (struct viterbi_xtrans){ + .SB = x.NB, + .SN = x.NN, + .NN = x.NN, + .NB = x.NB, + + .ET = x.EC + x.CT, + .EC = x.EC + x.CC, + .CC = x.CC, + .CT = x.CT, + + .EB = x.EJ + x.JB, + .EJ = x.EJ + x.JJ, + .JJ = x.JJ, + .JB = x.JB, + + .ME = 0.0f, + .DE = 0.0f, + }; +} + +int setup_viterbi(struct vit *x, struct protein const *protein) +{ + int K = protein->core_size; + int rc = vit_setup(x, K); + if (rc) return rc; + + struct viterbi_xtrans const xt = viterbi_xtrans_init(protein->xtrans); + vit_set_extr_trans(x, EXTR_TRANS_RR, -protein->null.RR); + vit_set_extr_trans(x, EXTR_TRANS_SN, -xt.SN); + vit_set_extr_trans(x, EXTR_TRANS_NN, -xt.NN); + vit_set_extr_trans(x, EXTR_TRANS_SB, -xt.SB); + vit_set_extr_trans(x, EXTR_TRANS_NB, -xt.NB); + vit_set_extr_trans(x, EXTR_TRANS_EB, -xt.EB); + vit_set_extr_trans(x, EXTR_TRANS_JB, -xt.JB); + vit_set_extr_trans(x, EXTR_TRANS_EJ, -xt.EJ); + vit_set_extr_trans(x, EXTR_TRANS_JJ, -xt.JJ); + vit_set_extr_trans(x, EXTR_TRANS_EC, -xt.EC); + vit_set_extr_trans(x, EXTR_TRANS_CC, -xt.CC); + vit_set_extr_trans(x, EXTR_TRANS_ET, -xt.ET); + vit_set_extr_trans(x, EXTR_TRANS_CT, -xt.CT); + + for (int k = 0; k < K; ++k) + { + vit_set_core_trans(x, CORE_TRANS_BM, -protein->BMk[k], k); + } + + vit_set_core_trans(x, CORE_TRANS_MM, INFINITY, 0); + vit_set_core_trans(x, CORE_TRANS_MD, INFINITY, 0); + vit_set_core_trans(x, CORE_TRANS_IM, INFINITY, 0); + vit_set_core_trans(x, CORE_TRANS_DM, INFINITY, 0); + vit_set_core_trans(x, CORE_TRANS_DD, INFINITY, 0); + for (int k = 0; k < K - 1; ++k) + { + vit_set_core_trans(x, CORE_TRANS_MM, -protein->nodes[k].trans.MM, k + 1); + vit_set_core_trans(x, CORE_TRANS_MI, -protein->nodes[k].trans.MI, k + 0); + vit_set_core_trans(x, CORE_TRANS_MD, -protein->nodes[k].trans.MD, k + 1); + vit_set_core_trans(x, CORE_TRANS_IM, -protein->nodes[k].trans.IM, k + 1); + vit_set_core_trans(x, CORE_TRANS_II, -protein->nodes[k].trans.II, k + 0); + vit_set_core_trans(x, CORE_TRANS_DM, -protein->nodes[k].trans.DM, k + 1); + vit_set_core_trans(x, CORE_TRANS_DD, -protein->nodes[k].trans.DD, k + 1); + } + vit_set_core_trans(x, CORE_TRANS_MI, INFINITY, K - 1); + vit_set_core_trans(x, CORE_TRANS_II, INFINITY, K - 1); + + for (size_t i = 0; i < VITERBI_TABLE_SIZE; ++i) + { + vit_set_null(x, -protein->null.emission[i], i); + vit_set_background(x, -protein->bg.emission[i], i); + + for (int k = 0; k < K; ++k) + vit_set_match(x, -protein->nodes[k].emission[i], k, i); + } + return 0; +} diff --git a/c-core/setup_viterbi.h b/c-core/setup_viterbi.h new file mode 100644 index 0000000..fad3891 --- /dev/null +++ b/c-core/setup_viterbi.h @@ -0,0 +1,9 @@ +#ifndef SETUP_VITERBI_H +#define SETUP_VITERBI_H + +struct vit; +struct protein; + +int setup_viterbi(struct vit *, struct protein const *); + +#endif diff --git a/c-core/test_protein.c b/c-core/test_protein.c index 40f1275..89edee8 100644 --- a/c-core/test_protein.c +++ b/c-core/test_protein.c @@ -5,9 +5,11 @@ #include "imm/nuclt_code.h" #include "imm/path.h" #include "protein.h" +#include "setup_viterbi.h" +#include "state.h" +#include "trellis.h" #include "vendor/minctest.h" -#include "viterbi.h" -#include "viterbi_struct.h" +#include "vit.h" static void test_protein_uniform(void); static void test_protein_occupancy(void); @@ -19,6 +21,12 @@ int main(void) return lfails; } +static int code_fn(int pos, int len, void *arg) +{ + struct imm_eseq const *seq = arg; + return imm_eseq_get(seq, pos, len, 1); +} + static void test_protein_uniform(void) { struct imm_path path = imm_path(); @@ -55,13 +63,14 @@ static void test_protein_uniform(void) eq(imm_eseq_setup(&eseq, &seq), 0); - struct viterbi task = {}; - viterbi_init(&task); - viterbi_setup(&task, &protein, &eseq); - close(viterbi_null_loglik(&task), -48.9272687711); - float score = 0; - close(viterbi_alt_path(&task, &path, &score), 0); - close(score, -55.59428153448); + struct vit *task = vit_new(); + eq(vit_setup(task, protein.core_size), 0); + eq(setup_viterbi(task, &protein), 0); + close(vit_null(task, imm_eseq_size(&eseq), code_fn, (void *)&eseq), 48.9272687711); + close(vit_cost(task, imm_eseq_size(&eseq), code_fn, (void *)&eseq), 55.59428153448); + eq(vit_path(task, imm_eseq_size(&eseq), code_fn, (void *)&eseq), 0); + imm_path_reset(&path); + eq(trellis_unzip(vit_trellis(task), imm_eseq_size(&eseq), &path), 0); eq(imm_path_nsteps(&path), 14); @@ -100,7 +109,7 @@ static void test_protein_uniform(void) eq(i, 10); imm_eseq_cleanup(&eseq); - viterbi_cleanup(&task); + vit_del(task); protein_cleanup(&protein); imm_path_cleanup(&path); } @@ -141,13 +150,14 @@ static void test_protein_occupancy(void) eq(imm_eseq_setup(&eseq, &seq), 0); - struct viterbi task = {}; - viterbi_init(&task); - viterbi_setup(&task, &protein, &eseq); - close(viterbi_null_loglik(&task), -48.9272687711); - float score = 0; - close(viterbi_alt_path(&task, &path, &score), 0); - close(score, -54.35543421312); + struct vit *task = vit_new(); + eq(vit_setup(task, protein.core_size), 0); + eq(setup_viterbi(task, &protein), 0); + close(vit_null(task, imm_eseq_size(&eseq), code_fn, (void *)&eseq), 48.9272687711); + close(vit_cost(task, imm_eseq_size(&eseq), code_fn, (void *)&eseq), 54.35543421312); + eq(vit_path(task, imm_eseq_size(&eseq), code_fn, (void *)&eseq), 0); + imm_path_reset(&path); + eq(trellis_unzip(vit_trellis(task), imm_eseq_size(&eseq), &path), 0); eq(imm_path_nsteps(&path), 14); @@ -186,7 +196,7 @@ static void test_protein_occupancy(void) eq(i, 10); imm_eseq_cleanup(&eseq); - viterbi_cleanup(&task); + vit_del(task); protein_cleanup(&protein); imm_path_cleanup(&path); } diff --git a/c-core/test_viterbi.c b/c-core/test_viterbi.c deleted file mode 100644 index 0c66b73..0000000 --- a/c-core/test_viterbi.c +++ /dev/null @@ -1,114 +0,0 @@ -#include "idot.h" -#include "ipow.h" -#include "isum.h" -#include "sample.h" -#include "vendor/minctest.h" -#include "vitfast.h" -#include "vitref.h" - -static void sample(struct vitref *ref, struct vitfast *vit, int K, int seed); -static void run(struct vitref *ref, struct vitfast *vit, char const *seq); - -int main(void) -{ - struct vitref *ref = NULL; - struct vitfast *vit = NULL; - - ok(ref = vitref_new()); - ok(vit = vitfast_new()); - - for (int K = 1; K < 10; K += 3) - { - ok(!vitref_setup(ref, K)); - ok(!vitfast_setup(vit, K)); - - for (int L = 1; L < 100; L += 11) - { - for (int seed = 0; seed < 100; ++seed) - { - vitref_sample(ref, seed); - sample(ref, vit, K, seed); - run(ref, vit, sample_sequence(L, "ACGT")); - } - } - } - - vitref_del(ref); - vitfast_del(vit); -} - -static void sample(struct vitref *ref, struct vitfast *vit, int K, int seed) -{ - vitref_sample(ref, seed); - vitfast_set_extr_trans(vit, EXTR_TRANS_SN, vitref_get_extr_trans(ref, EXTR_TRANS_SN)); - vitfast_set_extr_trans(vit, EXTR_TRANS_NN, vitref_get_extr_trans(ref, EXTR_TRANS_NN)); - vitfast_set_extr_trans(vit, EXTR_TRANS_SB, vitref_get_extr_trans(ref, EXTR_TRANS_SB)); - vitfast_set_extr_trans(vit, EXTR_TRANS_NB, vitref_get_extr_trans(ref, EXTR_TRANS_NB)); - vitfast_set_extr_trans(vit, EXTR_TRANS_EB, vitref_get_extr_trans(ref, EXTR_TRANS_EB)); - vitfast_set_extr_trans(vit, EXTR_TRANS_JB, vitref_get_extr_trans(ref, EXTR_TRANS_JB)); - vitfast_set_extr_trans(vit, EXTR_TRANS_EJ, vitref_get_extr_trans(ref, EXTR_TRANS_EJ)); - vitfast_set_extr_trans(vit, EXTR_TRANS_JJ, vitref_get_extr_trans(ref, EXTR_TRANS_JJ)); - vitfast_set_extr_trans(vit, EXTR_TRANS_EC, vitref_get_extr_trans(ref, EXTR_TRANS_EC)); - vitfast_set_extr_trans(vit, EXTR_TRANS_CC, vitref_get_extr_trans(ref, EXTR_TRANS_CC)); - vitfast_set_extr_trans(vit, EXTR_TRANS_ET, vitref_get_extr_trans(ref, EXTR_TRANS_ET)); - vitfast_set_extr_trans(vit, EXTR_TRANS_CT, vitref_get_extr_trans(ref, EXTR_TRANS_CT)); - - for (int i = 0; i < VITREF_TABLE_SIZE; ++i) - { - vitfast_set_null(vit, vitref_get_null(ref, i), i); - vitfast_set_background(vit, vitref_get_background(ref, i), i); - } - - for (int k = 0; k < K; ++k) - { - for (int i = 0; i < VITREF_TABLE_SIZE; ++i) - vitfast_set_match(vit, vitref_get_match(ref, k, i), k, i); - } - - for (int k = 0; k < K; ++k) - { - vitfast_set_core_trans(vit, CORE_TRANS_BM, vitref_get_core_trans(ref, CORE_TRANS_BM, k), k); - vitfast_set_core_trans(vit, CORE_TRANS_MM, vitref_get_core_trans(ref, CORE_TRANS_MM, k), k); - vitfast_set_core_trans(vit, CORE_TRANS_MI, vitref_get_core_trans(ref, CORE_TRANS_MI, k), k); - vitfast_set_core_trans(vit, CORE_TRANS_MD, vitref_get_core_trans(ref, CORE_TRANS_MD, k), k); - vitfast_set_core_trans(vit, CORE_TRANS_IM, vitref_get_core_trans(ref, CORE_TRANS_IM, k), k); - vitfast_set_core_trans(vit, CORE_TRANS_II, vitref_get_core_trans(ref, CORE_TRANS_II, k), k); - vitfast_set_core_trans(vit, CORE_TRANS_DM, vitref_get_core_trans(ref, CORE_TRANS_DM, k), k); - vitfast_set_core_trans(vit, CORE_TRANS_DD, vitref_get_core_trans(ref, CORE_TRANS_DD, k), k); - } -} - -static int encode(int size, char const *sequence) -{ - static int emission_limit = 5; - static int abc_size = 4; - static int ord[127]; - static int stride[10]; - static char const abc[] = "ACGT"; - - for (int i = 0; i < abc_size; ++i) - ord[(int)abc[i]] = i; - - for (int i = emission_limit - 1; i >= 0; --i) - stride[i] = ipow(abc_size, i); - - int axis[10]; - for (int i = 0; i < size; ++i) - axis[i] = ord[(int)sequence[i]]; - - return isum(size - 1, stride + 1) + idot(size, axis, stride); -} - -static int code_callb(int pos, int len, void *arg) -{ - char const *sequence = arg; - return encode(len, &sequence[pos]); -} - -static void run(struct vitref *ref, struct vitfast *vit, char const *seq) -{ - int L = strlen(seq); - float slow = vitref_cost(ref, L, code_callb, (void *)seq); - float fast = vitfast_cost(vit, L, code_callb, (void *)seq); - close(slow, fast); -} diff --git a/c-core/test_window.c b/c-core/test_window.c index e3b9ebf..af09bdb 100644 --- a/c-core/test_window.c +++ b/c-core/test_window.c @@ -40,7 +40,7 @@ int main(void) eq(scan_add(scan, sequences[0].id, sequences[0].name, seq), 0); eq(scan_run(scan, PRODDIR), 0); eq(scan_progress(scan), 100); - eq(chksum(PRODDIR "/products.tsv"), 5268); + eq(chksum(PRODDIR "/products.tsv"), 5999); eq(scan_close(scan), 0); scan_del(scan); diff --git a/c-core/thread.c b/c-core/thread.c index eade495..a689cd3 100644 --- a/c-core/thread.c +++ b/c-core/thread.c @@ -12,10 +12,12 @@ #include "product_thread.h" #include "protein_iter.h" #include "protein_reader.h" +#include "rc.h" #include "sequence.h" #include "sequence_queue.h" -#include "viterbi.h" -#include "viterbi_struct.h" +#include "setup_viterbi.h" +#include "trellis.h" +#include "vit.h" #include "window.h" void thread_init(struct thread *x) @@ -25,7 +27,7 @@ void thread_init(struct thread *x) x->multi_hits = false; x->hmmer3_compat = false; - viterbi_init(&x->viterbi); + x->viterbi = NULL; x->product = NULL; x->partition = -1; chararray_init(&x->amino); @@ -35,6 +37,7 @@ void thread_init(struct thread *x) int thread_setup(struct thread *x, struct thread_params params) { + if (!(x->viterbi = vit_new())) return DCP_ENOMEM; struct database_reader const *db = params.reader->db; protein_setup(&x->protein, database_reader_params(db, NULL)); int rc = 0; @@ -64,7 +67,11 @@ void thread_cleanup(struct thread *x) { x->partition = -1; protein_cleanup(&x->protein); - viterbi_cleanup(&x->viterbi); + if (x->viterbi) + { + vit_del(x->viterbi); + x->viterbi = NULL; + } chararray_cleanup(&x->amino); hmmer_cleanup(&x->hmmer); imm_path_cleanup(&x->path); @@ -114,6 +121,12 @@ static int hmmer_stage(struct protein *, struct product_thread *, struct sequence const *, int protein_idx, struct imm_path const *); +static int code_fn(int pos, int len, void *arg) +{ + struct imm_eseq const *seq = arg; + return imm_eseq_get(seq, pos, len, 1); +} + static int process_window(struct thread *x, int protein_idx, struct window const *w) { @@ -128,15 +141,19 @@ static int process_window(struct thread *x, int protein_idx, bool hmmer3_compat = x->hmmer3_compat; protein_reset(&x->protein, sequence_size(seq), multi_hits, hmmer3_compat); - if ((rc = viterbi_setup(&x->viterbi, &x->protein, &seq->imm.eseq))) return rc; + if ((rc = setup_viterbi(x->viterbi, &x->protein))) return rc; + + int L = sequence_size(seq); + float null = -vit_null(x->viterbi, sequence_size(seq), code_fn, (void *)&seq->imm.eseq); + float alt = -vit_cost(x->viterbi, sequence_size(seq), code_fn, (void *)&seq->imm.eseq); - float null = viterbi_null_loglik(&x->viterbi); - float alt = viterbi_alt_loglik(&x->viterbi); line->lrt = lrt(null, alt); if (!imm_lprob_is_finite(line->lrt) || line->lrt < 0) return rc; if ((rc = product_line_set_protein(line, x->protein.accession))) return rc; - if ((rc = viterbi_alt_path(&x->viterbi, &x->path, NULL))) return rc; + if ((rc = vit_path(x->viterbi, L, code_fn, (void *)&seq->imm.eseq))) return rc; + imm_path_reset(&x->path); + if ((rc = trellis_unzip(vit_trellis(x->viterbi), L, &x->path))) return rc; if (hmmer_online(&x->hmmer)) { diff --git a/c-core/thread.h b/c-core/thread.h index 21ebdba..b569253 100644 --- a/c-core/thread.h +++ b/c-core/thread.h @@ -7,8 +7,6 @@ #include "protein.h" #include "protein_iter.h" #include "thread_params.h" -#include "viterbi.h" -#include "viterbi_struct.h" struct chararray; struct product_thread; @@ -22,7 +20,7 @@ struct thread bool multi_hits; bool hmmer3_compat; - struct viterbi viterbi; + struct vit *viterbi; struct product_thread *product; int partition; struct chararray amino; diff --git a/c-core/trellis.c b/c-core/trellis.c index 5feb4dc..c4afd99 100644 --- a/c-core/trellis.c +++ b/c-core/trellis.c @@ -1,4 +1,5 @@ #include "trellis.h" +#include "imm/path.h" #include "error.h" #include "rc.h" #include "xrealloc.h" @@ -145,3 +146,26 @@ CONST unsigned node_get_field(uint16_t x, int state) return 0; } // clang-format on + +int trellis_unzip(struct trellis *x, int L, struct imm_path *path) +{ + int state = state_make_end(); + assert(seq_size <= INT_MAX); + int stage = L; + trellis_seek_xnode(x, stage); + + while (!state_is_start(state) || stage) + { + int size = trellis_emission_size(x, state); + if (imm_path_add(path, imm_step(state, size, 0))) return error(DCP_ENOMEM); + state = trellis_previous_state(x, state); + stage -= size; + if (state_is_core(state)) + trellis_seek_node(x, stage, state_core_idx(state)); + else + trellis_seek_xnode(x, stage); + } + if (imm_path_add(path, imm_step(state, 0, 0))) return error(DCP_ENOMEM); + imm_path_reverse(path); + return 0; +} diff --git a/c-core/trellis.h b/c-core/trellis.h index b55cce4..7812e5e 100644 --- a/c-core/trellis.h +++ b/c-core/trellis.h @@ -6,6 +6,8 @@ #include "state.h" #include "trellis_bits.h" +struct imm_path; + struct trellis { int core_size; @@ -29,11 +31,27 @@ void trellis_seek_xnode(struct trellis *x, int stage); void trellis_seek_node(struct trellis *x, int stage, int core_idx); INLINE void trellis_clear_xnode(struct trellis *x) { *x->xnode = 0; } INLINE void trellis_clear_node(struct trellis *x) { *x->node = 0; } +int trellis_unzip(struct trellis *, int seq_size, struct imm_path *); // clang-format on // clang-format off INLINE void trellis_set(struct trellis *x, int id, int value) { + /* long sz = x->node - x->nodes; */ + /* long stage = sz / x->core_size; */ + /* long k = sz - stage * x->core_size; */ + /* if (id == STATE_S) printf("S: %d\n", value); */ + /* else if (id == STATE_N) printf("N: %d\n", value); */ + /* else if (id == STATE_B) printf("B: %d\n", value); */ + /* else if (id == STATE_E) printf("E: %d\n", value); */ + /* else if (id == STATE_C) printf("C: %d\n", value); */ + /* else if (id == STATE_T) printf("T: %d\n", value); */ + /* else if (id == STATE_J) printf("J: %d\n", value); */ + /* else if (state_is_match(id)) printf("M%ld: %d\n", k, value); */ + /* else if (state_is_delete(id)) printf("D%ld: %d\n", k, value); */ + /* else if (state_is_insert(id)) printf("I%ld: %d\n", k, value); */ + /* else UNREACHABLE(); */ + unsigned v = *(unsigned *)&value; if (id == STATE_S) *x->xnode |= v << (0); else if (id == STATE_N) *x->xnode |= v << (0 + SBITS); diff --git a/c-core/vit.c b/c-core/vit.c new file mode 100644 index 0000000..24f2c51 --- /dev/null +++ b/c-core/vit.c @@ -0,0 +1,942 @@ +#include "vit.h" +#include "trellis.h" +#include +#include +#include +#include + +#if __ARM_NEON + #include + typedef float32x4_t packf; + typedef uint32x4_t packu; + #define ALIGNMENT 16 + #define NUM_LANES 4 +#elif __AVX__ + #include + typedef __m256 packf; + typedef __m256i packu; + #define ALIGNMENT 32 + #define NUM_LANES 8 +#else + #error "We require either AVX or NEON feature." +#endif + +#define INLINE static inline __attribute__((always_inline)) + +typedef uint32_t u32; +typedef float f32; + +#define STEP_NAME_OFFSET 28 +#define STEP_LANE_OFFSET 24 +#define STEP_DATA_OFFSET 0 + +#define STEP_NAME_MASK 0xF0000000 +#define STEP_LANE_MASK 0x0F000000 +#define STEP_DATA_MASK 0x00FFFFFF + +#define STEP_SN (0x1 << STEP_NAME_OFFSET) +#define STEP_NN (0x2 << STEP_NAME_OFFSET) + +#define STEP_SB (0x1 << STEP_NAME_OFFSET) +#define STEP_NB (0x2 << STEP_NAME_OFFSET) +#define STEP_EB (0x4 << STEP_NAME_OFFSET) +#define STEP_JB (0x8 << STEP_NAME_OFFSET) + +#define STEP_EJ (0x1 << STEP_NAME_OFFSET) +#define STEP_JJ (0x2 << STEP_NAME_OFFSET) + +#define STEP_EC (0x1 << STEP_NAME_OFFSET) +#define STEP_CC (0x2 << STEP_NAME_OFFSET) + +#define STEP_ET (0x1 << STEP_NAME_OFFSET) +#define STEP_CT (0x2 << STEP_NAME_OFFSET) + +#define STEP_BM (0x1 << STEP_NAME_OFFSET) +#define STEP_MM (0x2 << STEP_NAME_OFFSET) +#define STEP_IM (0x4 << STEP_NAME_OFFSET) +#define STEP_DM (0x8 << STEP_NAME_OFFSET) + +#define STEP_MI (0x1 << STEP_NAME_OFFSET) +#define STEP_II (0x2 << STEP_NAME_OFFSET) + +#define STEP_MD (0x1 << STEP_NAME_OFFSET) +#define STEP_DD (0x2 << STEP_NAME_OFFSET) + +#define STEP_ME (0x1 << STEP_NAME_OFFSET) +#define STEP_DE (0x2 << STEP_NAME_OFFSET) + +INLINE u32 step(u32 n, int x) +{ + return (n & STEP_NAME_MASK) | ((STEP_LANE_MASK | STEP_DATA_MASK) & (u32)x); +} + +INLINE int step_lane(u32 x) +{ + return (int)(x & STEP_LANE_MASK) >> STEP_LANE_OFFSET; +} + +INLINE int step_data(u32 x) +{ + return (int)(STEP_DATA_MASK & x) >> STEP_DATA_OFFSET; +} + +struct emission +{ + f32 null[VITERBI_TABLE_SIZE]; + packf background[VITERBI_TABLE_SIZE]; + packf *match; +}; + +struct extr_trans +{ + f32 RR; + + f32 SN; + f32 NN; + + f32 SB; + f32 NB; + f32 EB; + f32 JB; + + f32 EJ; + f32 JJ; + + f32 EC; + f32 CC; + + f32 ET; + f32 CT; +}; + +struct core_trans +{ + packf BM; + packf MM; + packf MI; + packf MD; + packf IM; + packf II; + packf DM; + packf DD; +}; + +struct extr_state +{ + // Prefix + f32 S; + f32 N; + f32 B; + + // Infix + f32 J; + + // Suffix + f32 E; + f32 C; + f32 T; +}; + +struct core_state +{ + packf M; + packf D; + packf I; +}; + +struct prev_extr_state +{ + u32 S; + u32 N; + u32 B; + + u32 J; + + u32 C; + u32 E; + u32 T; +}; + +struct prev_core_state +{ + packu M; + packu D; + packu I; +}; + +struct vit +{ + int K; + int Q; + int maxQ; + struct extr_state extr_state[VITERBI_TIME_FRAME]; + struct core_state *core_state; + + struct emission emission; + + struct extr_trans extr_trans; + struct core_trans *core_trans; + + struct prev_extr_state prev_extr_state; + struct prev_core_state *prev_core_state; + struct trellis trellis; +}; + +#if __ARM_NEON +#define add(a, b) vaddq_f32(a, b) +#define and(a, b) vandq_u32(a, b) +#define blendf(m, a, b) vbslq_f32(m, a, b) +#define blendu(m, a, b) vbslq_u32(m, a, b) +#define castf(x) (packf)(x) +#define castu(x) (packu)(x) +#define dupf(x) vdupq_n_f32(x) +#define dupu(x) vdupq_n_u32(x) +#define eq(a, b) vceqq_f32(a, b) +#define initu(a, b, c, d) ((packu){a, b, c, d}) +#define initf(a, b, c, d) ((packf){a, b, c, d}) +#define loadf(mem) vld1q_f32(mem) +#define loadu(mem) vld1q_u32(mem) +#define maxu(a, b) vmaxq_u32(a, b) +#define min(a, b) vminq_f32(a, b) +#define or(a, b) vorrq_u32(a, b) +#define storef(mem, x) vst1q_f32(mem, x) +#define storeu(mem, x) vst1q_u32(mem, x) +#endif + +#if __AVX__ +#define add(a, b) _mm256_add_ps(a, b) +#define and(a, b) _mm256_and_si256(a, b) +#define blendf(m, a, b) _mm256_blendv_ps(b, a, m) +#define blendu(m, a, b) _mm256_blendv_epi8(b, a, m) +#define castf(x) _mm256_castsi256_ps(x) +#define castu(x) _mm256_castps_si256(x) +#define dupf(x) _mm256_set1_ps(x) +#define dupu(x) _mm256_set1_epi32(x) +#define eq(a, b) castu(_mm256_cmp_ps(a, b, _CMP_EQ_OQ)) +#define initu(a, b, c, d, e, f, g, h) _mm256_setr_epi32(a, b, c, d, e, f, g, h) +#define initf(a, b, c, d, e, f, g, h) _mm256_setr_ps(a, b, c, d, e, f, g, h) +#define loadf(mem) _mm256_loadu_ps(mem) +#define loadu(mem) _mm256_loadu_si256(mem) +#define maxu(a, b) _mm256_max_epi32(a, b) +#define min(a, b) _mm256_min_ps(a, b) +#define or(a, b) _mm256_or_si256(a, b) +#define storef(mem, x) _mm256_storeu_ps(mem, x) +#define storeu(mem, x) _mm256_storeu_si256((packu *)mem, x) +#endif + +INLINE packf shift(packf x) +{ +#if __ARM_NEON + return vextq_f32(dupf(INFINITY), x, NUM_LANES - 1); +#endif +#if __AVX__ + x = _mm256_permutevar8x32_ps(x, initu(7, 0, 1, 2, 3, 4, 5, 6)); + return _mm256_blend_ps(x, dupf(INFINITY), _MM_SHUFFLE(0, 0, 0, 1)); +#endif +} + +INLINE u32 hmaxu(packu x) +{ +#if __ARM_NEON + return vmaxvq_u32(x); +#endif +#if __AVX__ + x = maxu(x, castu(_mm256_permute_ps(castf(x), _MM_SHUFFLE(2, 3, 0, 1)))); + x = maxu(x, castu(_mm256_permute_ps(castf(x), _MM_SHUFFLE(1, 0, 3, 2)))); + x = maxu(x, _mm256_permute2f128_si256(x, x, _MM_SHUFFLE(0, 0, 0, 1))); + return _mm_cvtsi128_si32(_mm256_castsi256_si128(x)); +#endif +} + +INLINE f32 hmin(packf x) +{ +#if __ARM_NEON + return vminvq_f32(x); +#endif +#if __AVX__ + x = min(x, _mm256_permute_ps(x, _MM_SHUFFLE(2, 3, 0, 1))); + x = min(x, _mm256_permute_ps(x, _MM_SHUFFLE(1, 0, 3, 2))); + x = min(x, _mm256_permute2f128_ps(x, x, _MM_SHUFFLE(0, 0, 0, 1))); + return _mm_cvtss_f32(_mm256_castps256_ps128(x)); +#endif +} + +INLINE int all_leq(packf a, packf b) +{ +#if __ARM_NEON + packu m = vmvnq_u32(eq(min(a, b), a)); + uint32x2_t r = vshrn_n_u64(vreinterpretq_u64_u32(m), 16); + return !vget_lane_u64(vreinterpret_u64_u32(r), 0); +#endif +#if __AVX__ + return 0xFF == _mm256_movemask_ps(_mm256_cmp_ps(a, b, _CMP_LE_OS)); +#endif +} + +INLINE void setf(packf *x, f32 v, int e) +{ + packf broad = dupf(v); + int32_t m[2 * NUM_LANES] = {0}; + m[NUM_LANES] = -1; + packu mask = loadu((void const *)(m + NUM_LANES - (e & (NUM_LANES - 1)))); + *x = blendf(castf(mask), broad, *x); +} + +__attribute__((unused)) INLINE void setu(packu *x, u32 v, int e) +{ + packu broad = dupu(v); + int32_t m[2 * NUM_LANES] = {0}; + m[NUM_LANES] = -1; + packu mask = loadu((void const *)(m + NUM_LANES - (e & (NUM_LANES - 1)))); + *x = blendu(mask, broad, *x); +} + +INLINE f32 getf(packf x, int e) +{ + f32 arr[NUM_LANES]; + storef(arr, x); + return arr[e & (NUM_LANES - 1)]; +} + +INLINE u32 getu(packu x, int e) +{ + u32 arr[NUM_LANES]; + storeu(arr, x); + return arr[e & (NUM_LANES - 1)]; +} + +__attribute__((unused)) static void echof(packf x) +{ + for (int i = 0; i < NUM_LANES; ++i) + printf("%6.4f ", getf(x, i)); + printf("\n"); +} + +__attribute__((unused)) static void echou(packu x) +{ + for (int i = 0; i < NUM_LANES; ++i) + printf("%#010x ", getu(x, i)); + printf("\n"); +} + +INLINE packu pack_index(void) +{ +#if __ARM_NEON + return initu(0x0 << STEP_LANE_OFFSET, 0x1 << STEP_LANE_OFFSET, + 0x2 << STEP_LANE_OFFSET, 0x3 << STEP_LANE_OFFSET); +#endif +#if __AVX__ + return initu(0x0 << STEP_LANE_OFFSET, 0x1 << STEP_LANE_OFFSET, + 0x2 << STEP_LANE_OFFSET, 0x3 << STEP_LANE_OFFSET, + 0x4 << STEP_LANE_OFFSET, 0x5 << STEP_LANE_OFFSET, + 0x6 << STEP_LANE_OFFSET, 0x7 << STEP_LANE_OFFSET); +#endif +} + +static inline int num_packs(int K) +{ + int r = (K - 1) / NUM_LANES + 1; + return r < 2 ? 2 : r; +} + +INLINE void acc(packf *acc_val, packf val, packu *acc_idx, packu idx, int save) +{ + packf x = min(*acc_val, val); + // if (save) *acc_idx = blendu(eq(x, val), idx, *acc_idx); + if (save) *acc_idx = blendu(eq(x, *acc_val), *acc_idx, idx); + *acc_val = x; +} + +INLINE void facc(f32 *acc_val, f32 val, u32 *acc_idx, u32 idx, int save) +{ + f32 x = fminf(*acc_val, val); + if (save) *acc_idx = x == *acc_val ? *acc_idx : idx; + *acc_val = x; +} + +INLINE void hacc(f32 *hval, packf val, u32 *hidx, packu idx, int save) +{ + f32 x = hmin(val); + if (save) *hidx = hmaxu(and(eq(val, dupf(x)), idx)); + *hval = x; +} + +INLINE int core_pack(int k, int Q) { return k % Q; } +INLINE int core_lane(int k, int Q) { return k / Q; } +INLINE int imin(int a, int b) { return a < b ? a : b; } + +INLINE packf sum(packf a, packf b, packf c) { return add(add(a, b), c); } +INLINE int timemap(int q, int t, int Q) { return t * Q + q; } + +static inline void core_state_init(struct core_state *x, int t, int q, + int Q) +{ + x[timemap(q, t, Q)].M = dupf(INFINITY); + x[timemap(q, t, Q)].D = dupf(INFINITY); + x[timemap(q, t, Q)].I = dupf(INFINITY); +} + +static inline void core_trans_init(struct core_trans *x) +{ + x->BM = dupf(INFINITY); + x->MM = dupf(INFINITY); + x->MI = dupf(INFINITY); + x->MD = dupf(INFINITY); + x->IM = dupf(INFINITY); + x->II = dupf(INFINITY); + x->DM = dupf(INFINITY); + x->DD = dupf(INFINITY); +} + +static inline void emission_init(struct emission *x, int Q) +{ + for (int i = 0; i < VITERBI_TABLE_SIZE; ++i) + { + x->null[i] = INFINITY; + x->background[i] = dupf(INFINITY); + } + + for (int i = 0; i < VITERBI_TABLE_SIZE * Q; ++i) + x->match[i] = dupf(INFINITY); +} + +static inline void extr_state_init(struct extr_state *x, int t) +{ + x[t].S = INFINITY; + x[t].N = INFINITY; + x[t].B = INFINITY; + + x[t].J = INFINITY; + + x[t].E = INFINITY; + x[t].C = INFINITY; + x[t].T = INFINITY; +} + +static inline void extr_trans_init(struct extr_trans *x) +{ + x->SN = INFINITY; + x->NN = INFINITY; + x->SB = INFINITY; + x->NB = INFINITY; + x->EB = INFINITY; + x->JB = INFINITY; + x->EJ = INFINITY; + x->JJ = INFINITY; + x->EC = INFINITY; + x->CC = INFINITY; + x->ET = INFINITY; + x->CT = INFINITY; +} + +static inline void prev_core_state_init(struct prev_core_state *x) +{ + x->M = dupu(step(STEP_BM, 1)); + x->D = dupu(step(STEP_MD, 0)); + x->I = dupu(step(STEP_MI, 1)); +} + +static inline void prev_extr_state_init(struct prev_extr_state *x) +{ + x->S = 0U; + x->N = step(STEP_SN, 1); + x->B = step(STEP_SB, 0); + + x->J = step(STEP_EJ, 1); + + x->E = step(STEP_ME, 0); + x->C = step(STEP_EC, 1); + x->T = step(STEP_ET, 0); +} + +struct vit *vit_new(void) +{ + struct vit *x = aligned_alloc(ALIGNMENT, sizeof(struct vit)); + if (!x) return x; + + x->Q = x->maxQ = 0; + x->emission.match = NULL; + x->core_state = NULL; + x->core_trans = NULL; + x->prev_core_state = NULL; + trellis_init(&x->trellis); + + return x; +} + +void vit_del(struct vit const *x) +{ + if (x) + { + free(x->emission.match); + free(x->core_state); + free(x->core_trans); + free(x->prev_core_state); + trellis_cleanup((struct trellis *)&x->trellis); + free((void *)x); + } +} + +int vit_setup(struct vit *x, int K) +{ + x->K = K; + int Q = x->Q = num_packs(K); + + for (int t = 0; t < VITERBI_TIME_FRAME; ++t) + extr_state_init(x->extr_state, t); + + if (Q > x->maxQ) + { + free(x->core_state); + x->core_state = aligned_alloc( + ALIGNMENT, sizeof(struct core_state[VITERBI_TIME_FRAME][Q])); + if (!x->core_state) return 1; + + free(x->emission.match); + x->emission.match = + aligned_alloc(ALIGNMENT, sizeof(packf[VITERBI_TABLE_SIZE][Q])); + if (!x->emission.match) return 1; + + free(x->core_trans); + x->core_trans = aligned_alloc(ALIGNMENT, sizeof(struct core_trans[Q])); + if (!x->core_trans) return 1; + + free(x->prev_core_state); + x->prev_core_state = + aligned_alloc(ALIGNMENT, sizeof(struct prev_core_state[Q])); + if (!x->prev_core_state) return 1; + + x->maxQ = Q; + } + + extr_trans_init(&x->extr_trans); + for (int q = 0; q < Q; ++q) + core_trans_init(&x->core_trans[q]); + + for (int q = 0; q < Q; ++q) + { + for (int t = 0; t < VITERBI_TIME_FRAME; ++t) + core_state_init(x->core_state, t, q, Q); + } + + emission_init(&x->emission, Q); + + return 0; +} + +void vit_set_extr_trans(struct vit *x, enum extr_trans_id id, + f32 scalar) +{ + switch (id) + { + case EXTR_TRANS_RR: x->extr_trans.RR = scalar; break; + case EXTR_TRANS_SN: x->extr_trans.SN = scalar; break; + case EXTR_TRANS_NN: x->extr_trans.NN = scalar; break; + case EXTR_TRANS_SB: x->extr_trans.SB = scalar; break; + case EXTR_TRANS_NB: x->extr_trans.NB = scalar; break; + case EXTR_TRANS_EB: x->extr_trans.EB = scalar; break; + case EXTR_TRANS_JB: x->extr_trans.JB = scalar; break; + case EXTR_TRANS_EJ: x->extr_trans.EJ = scalar; break; + case EXTR_TRANS_JJ: x->extr_trans.JJ = scalar; break; + case EXTR_TRANS_EC: x->extr_trans.EC = scalar; break; + case EXTR_TRANS_CC: x->extr_trans.CC = scalar; break; + case EXTR_TRANS_ET: x->extr_trans.ET = scalar; break; + case EXTR_TRANS_CT: x->extr_trans.CT = scalar; break; + default: + __builtin_unreachable(); + break; + } +} + +void vit_set_core_trans(struct vit *x, enum core_trans_id id, + f32 scalar, int k) +{ + int q = core_pack(k, x->Q); + int e = core_lane(k, x->Q); + + switch (id) + { + case CORE_TRANS_BM: setf(&x->core_trans[q].BM, scalar, e); break; + case CORE_TRANS_MM: setf(&x->core_trans[q].MM, scalar, e); break; + case CORE_TRANS_MI: setf(&x->core_trans[q].MI, scalar, e); break; + case CORE_TRANS_MD: setf(&x->core_trans[q].MD, scalar, e); break; + case CORE_TRANS_IM: setf(&x->core_trans[q].IM, scalar, e); break; + case CORE_TRANS_II: setf(&x->core_trans[q].II, scalar, e); break; + case CORE_TRANS_DM: setf(&x->core_trans[q].DM, scalar, e); break; + case CORE_TRANS_DD: setf(&x->core_trans[q].DD, scalar, e); break; + default: + __builtin_unreachable(); + break; + } +} + +void vit_set_null(struct vit *x, f32 scalar, int code) +{ + x->emission.null[code] = scalar; +} + +void vit_set_background(struct vit *x, f32 scalar, int code) +{ + x->emission.background[code] = dupf(scalar); +} + +void vit_set_match(struct vit *x, f32 scalar, int k, int code) +{ + int q = core_pack(k, x->Q); + int e = core_lane(k, x->Q); + setf(&x->emission.match[code * x->Q + q], scalar, e); +} + +static void before(struct trellis *, int K); +static void after(struct trellis *, int Q, int K, struct prev_extr_state *, + struct prev_core_state *); +static void dump_trellis(struct vit const *, int l); + +INLINE f32 cost(struct vit *x, int L, int path, viterbi_code_fn code_fn, + void *code_arg) +{ + struct emission const em = x->emission; + + struct extr_trans const xt = x->extr_trans; + struct core_trans const *ct = x->core_trans; + + struct extr_state *xs = x->extr_state; + struct core_state *cs = x->core_state; + + struct prev_extr_state *px = &x->prev_extr_state; + struct prev_core_state *pc = x->prev_core_state; + +#define xM(q, t) cs[timemap(q, t, Q)].M +#define xD(q, t) cs[timemap(q, t, Q)].D +#define xI(q, t) cs[timemap(q, t, Q)].I + + int Q = x->Q; + + // For l = 0 + xs[0].S = 0; + xs[0].B = xt.SB; + if (path) before(&x->trellis, x->K); + for (int l = 1; l <= L; ++l) + { + extr_state_init(xs, imin(VITERBI_TIME_FRAME - 1, l)); + prev_extr_state_init(px); + for (int q = 0; q < Q; ++q) + { + core_state_init(cs, imin(VITERBI_TIME_FRAME - 1, l), q, Q); + prev_core_state_init(&pc[q]); + } + + for (int t = imin(VITERBI_TIME_FRAME - 1, l); t > 0; --t) + { + int code = code_fn(l - t, t, code_arg); + f32 nil = em.null[code]; + int a = t - 0; + int z = t - 1; + + facc(&xs[a].N, xs[z].S + xt.SN + nil, &px->N, step(STEP_SN, t), path); + facc(&xs[a].N, xs[z].N + xt.NN + nil, &px->N, step(STEP_NN, t), path); + + facc(&xs[a].B, xs[a].S + xt.SB, &px->B, step(STEP_SB, 0), path); + facc(&xs[a].B, xs[a].N + xt.NB, &px->B, step(STEP_NB, 0), path); + + facc(&xs[a].J, xs[z].E + xt.EJ + nil, &px->J, step(STEP_EJ, t), path); + facc(&xs[a].J, xs[z].J + xt.JJ + nil, &px->J, step(STEP_JJ, t), path); + + facc(&xs[a].C, xs[z].E + xt.EC + nil, &px->C, step(STEP_EC, t), path); + facc(&xs[a].C, xs[z].C + xt.CC + nil, &px->C, step(STEP_CC, t), path); + + packf lastMz = shift(xM(Q - 1, z)); + packf lastDz = shift(xD(Q - 1, z)); + packf lastIz = shift(xI(Q - 1, z)); + packf lastMa = shift(xM(Q - 1, a)); + packf currBz = dupf(xs[z].B); + packf E = dupf(INFINITY); + packu prevE = dupu(0U); + packf bg = em.background[code]; + for (int q = 0; q < Q; ++q) + { + packf ma = em.match[code * Q + q]; + packf Ma = xM(q, a); + packf Da = xD(q, a); + packf Ia = xI(q, a); + packf BM = ct[q].BM; + packf MM = ct[q].MM; + packf IM = ct[q].IM; + packf DM = ct[q].DM; + packf MI = ct[q].MI; + packf II = ct[q].II; + packf MD = ct[q].MD; + + acc(&Ma, sum(currBz, BM, ma), &pc[q].M, dupu(step(STEP_BM, t)), path); + acc(&Ma, sum(lastMz, MM, ma), &pc[q].M, dupu(step(STEP_MM, t)), path); + acc(&Ma, sum(lastIz, IM, ma), &pc[q].M, dupu(step(STEP_IM, t)), path); + acc(&Ma, sum(lastDz, DM, ma), &pc[q].M, dupu(step(STEP_DM, t)), path); + + lastMz = xM(q, z); + lastDz = xD(q, z); + lastIz = xI(q, z); + + acc(&Ia, sum(lastIz, II, bg), &pc[q].I, dupu(step(STEP_II, t)), path); + acc(&Ia, sum(lastMz, MI, bg), &pc[q].I, dupu(step(STEP_MI, t)), path); + + acc(&Da, add(lastMa, MD), &pc[q].D, dupu(step(STEP_MD, 0)), path); + + acc(&E, Ma, &prevE, dupu(step(STEP_ME, q)), path); + acc(&E, Da, &prevE, dupu(step(STEP_DE, q)), path); + + lastMa = Ma; + + xM(q, a) = lastMz; + xD(q, a) = lastDz; + xI(q, a) = lastIz; + xM(q, z) = Ma; + xD(q, z) = Da; + xI(q, z) = Ia; + } + { + lastMa = shift(lastMa); + packf MD = ct[0].MD; + acc(&xD(0, z), add(lastMa, MD), &pc[0].D, dupu(step(STEP_MD, 0)), path); + acc(&E, xD(0, z), &prevE, dupu(step(STEP_DE, 0)), path); + prevE = or(prevE, pack_index()); + hacc(&xs[a].E, E, &px->E, prevE, path); + } + + packf lastD0 = shift(xD(Q - 1, z)); + for (int q = 0; q < Q; ++q) + { + packf DD = ct[q].DD; + acc(&xD(q, z), add(lastD0, DD), &pc[q].D, dupu(step(STEP_DD, 0)), path); + lastD0 = xD(q, z); + } + + int q = 0; + do + { + lastD0 = shift(lastD0); + for (q = 0; q < Q; ++q) + { + packf x = add(lastD0, ct[q].DD); + if (all_leq(xD(q, z), x)) break; + acc(&xD(q, z), x, &pc[q].D, dupu(step(STEP_DD, 0)), path); + lastD0 = xD(q, z); + } + } while (q == Q); + + facc(&xs[a].B, xs[a].E + xt.EB, &px->B, step(STEP_EB, 0), path); + facc(&xs[a].B, xs[a].J + xt.JB, &px->B, step(STEP_JB, 0), path); + + facc(&xs[a].T, xs[a].E + xt.ET, &px->T, step(STEP_ET, 0), path); + facc(&xs[a].T, xs[a].C + xt.CT, &px->T, step(STEP_CT, 0), path); + + struct extr_state tmp = xs[z]; + xs[z] = xs[a]; + xs[a] = tmp; + } + if (path) after(&x->trellis, Q, x->K, px, pc); + } + +#undef xM +#undef xD +#undef xI + + return xs[0].T; +} + +static void before(struct trellis *tr, int K) +{ + trellis_seek_xnode(tr, 0); + trellis_seek_node(tr, 0, 0); + + trellis_clear_xnode(tr); + trellis_set(tr, STATE_N, 0); + trellis_set(tr, STATE_B, 0); // SB + trellis_clear_node(tr); + trellis_set(tr, STATE_M, 0); + for (int k = 0; k + 1 < K; ++k) + { + trellis_set(tr, STATE_I, 0); + trellis_next_node(tr); + trellis_clear_node(tr); + trellis_set(tr, STATE_M, 0); + trellis_set(tr, STATE_D, 0); + } + trellis_next_node(tr); + trellis_set(tr, STATE_E, 0); + trellis_set(tr, STATE_J, 0); + trellis_set(tr, STATE_C, 0); + trellis_set(tr, STATE_T, 0); + trellis_next_xnode(tr); + + trellis_seek_xnode(tr, 1); + trellis_seek_node(tr, 1, 0); +} + +static void after(struct trellis *tr, int Q, int K, struct prev_extr_state *px, + struct prev_core_state *pc) +{ + trellis_clear_xnode(tr); + if (STEP_SN & px->N) trellis_set(tr, STATE_N, 0 + step_data(px->N) - 1); + if (STEP_NN & px->N) trellis_set(tr, STATE_N, 5 + step_data(px->N) - 1); + + if (STEP_SB & px->B) trellis_set(tr, STATE_B, step_data(px->B) + 0); + if (STEP_NB & px->B) trellis_set(tr, STATE_B, step_data(px->B) + 1); + if (STEP_EB & px->B) trellis_set(tr, STATE_B, step_data(px->B) + 2); + if (STEP_JB & px->B) trellis_set(tr, STATE_B, step_data(px->B) + 3); + + if (tr) trellis_clear_node(tr); + { + int q = core_pack(0, Q); + int e = core_lane(0, Q); + u32 M = getu(pc[q].M, e); + if (tr && STEP_BM & M) trellis_set(tr, STATE_M, 0 + step_data(M) - 1); + } + + for (int k = 0; k + 1 < K; ++k) + { + int q = core_pack(k, Q); + int e = core_lane(k, Q); + u32 I = getu(pc[q].I, e); + int n = k + 1; + int qn = core_pack(n, Q); + int en = core_lane(n, Q); + u32 M = getu(pc[qn].M, en); + u32 D = getu(pc[qn].D, en); + if (STEP_MI & I) trellis_set(tr, STATE_I, 0 + step_data(I) - 1); + if (STEP_II & I) trellis_set(tr, STATE_I, 5 + step_data(I) - 1); + trellis_next_node(tr); + trellis_clear_node(tr); + + if (STEP_BM & M) trellis_set(tr, STATE_M, 0 + step_data(M) - 1); + if (STEP_MM & M) trellis_set(tr, STATE_M, 5 + step_data(M) - 1); + if (STEP_IM & M) trellis_set(tr, STATE_M, 10 + step_data(M) - 1); + if (STEP_DM & M) trellis_set(tr, STATE_M, 15 + step_data(M) - 1); + + if (STEP_MD & D) trellis_set(tr, STATE_D, 0); + if (STEP_DD & D) trellis_set(tr, STATE_D, 1); + } + if (tr) trellis_next_node(tr); + + int q = step_data(px->E); + int e = step_lane(px->E); + int k = e * Q + q; + if (STEP_ME & px->E) trellis_set(tr, STATE_E, 2 * k + 0); + if (STEP_DE & px->E) trellis_set(tr, STATE_E, 2 * k + 1); + + if (STEP_EJ & px->J) trellis_set(tr, STATE_J, 0 + step_data(px->J) - 1); + if (STEP_JJ & px->J) trellis_set(tr, STATE_J, 5 + step_data(px->J) - 1); + + if (STEP_EC & px->C) trellis_set(tr, STATE_C, 0 + step_data(px->C) - 1); + if (STEP_CC & px->C) trellis_set(tr, STATE_C, 5 + step_data(px->C) - 1); + + if (STEP_ET & px->T) trellis_set(tr, STATE_T, 0 + step_data(px->T)); + if (STEP_CT & px->T) trellis_set(tr, STATE_T, 1 + step_data(px->T)); + + trellis_next_xnode(tr); + + // if (path) dump_trellis(x, l); +} + +float vit_null(struct vit *x, int L, viterbi_code_fn fn, void *arg) +{ + float RR = x->extr_trans.RR; + struct emission const em = x->emission; + float R[VITERBI_TIME_FRAME] = {}; + for (int i = 0; i < VITERBI_TIME_FRAME; ++i) + R[i] = INFINITY; + R[0] = -RR; + for (int l = 1; l <= L; ++l) + { + R[imin(VITERBI_TIME_FRAME - 1, l)] = INFINITY; + for (int t = imin(VITERBI_TIME_FRAME - 1, l); t > 0; --t) + { + int code = fn(l - t, t, arg); + f32 nil = em.null[code]; + int a = t - 0; + int z = t - 1; + float tmp = fminf(R[a], R[z] + RR + nil); + R[a] = R[z]; + R[z] = tmp; + } + } + return R[0]; +} + +f32 vit_cost(struct vit *x, int L, viterbi_code_fn fn, void *arg) +{ + return cost(x, L, 0, fn, arg); +} + +int vit_path(struct vit *x, int L, viterbi_code_fn fn, void *arg) +{ + int rc = trellis_setup(&x->trellis, x->K, L); + if (rc) return rc; + cost(x, L, 1, fn, arg); + return 0; +} + +struct trellis *vit_trellis(struct vit *x) +{ + return &x->trellis; +} + +__attribute__((unused)) static void dump_trellis(struct vit const *x, int l) +{ + struct prev_extr_state const *px = &x->prev_extr_state; + struct prev_core_state const *pc = x->prev_core_state; + + if (px->N & STEP_SN) printf("SN=%d ", step_data(px->N)); + else if (px->N & STEP_NN) printf("NN=%d ", step_data(px->N)); + else printf("?N=? "); + + for (int k = 0; k < x->K; ++k) + { +#define CORE(q, name, lane) getu(pc[q].name, lane) +#define DATA(q, name, lane) step_data(CORE(q, M, e)) + int q = core_pack(k, x->Q); + int e = core_lane(k, x->Q); + if (CORE(q, M, e) & STEP_BM) printf("BM=%d ", DATA(q, M, e)); + else if (CORE(q, M, e) & STEP_MM) printf("MM=%d ", DATA(q, M, e)); + else if (CORE(q, M, e) & STEP_IM) printf("IM=%d ", DATA(q, M, e)); + else if (CORE(q, M, e) & STEP_DM) printf("DM=%d ", DATA(q, M, e)); + else printf("?M=? "); + + if (CORE(q, I, e) & STEP_MI) printf("MI=%d ", DATA(q, I, e)); + else if (CORE(q, I, e) & STEP_II) printf("II=%d ", DATA(q, I, e)); + else printf("?I=? "); + + if (CORE(q, D, e) & STEP_MD) printf("MD=%d ", DATA(q, D, e)); + else if (CORE(q, D, e) & STEP_DD) printf("DD=%d ", DATA(q, D, e)); + else printf("?D=? "); +#undef DATA +#undef CORE + } + + if (px->B & STEP_SB) printf("SB=%d ", step_data(px->B)); + else if (px->B & STEP_NB) printf("NB=%d ", step_data(px->B)); + else if (px->B & STEP_EB) printf("EB=%d ", step_data(px->B)); + else if (px->B & STEP_JB) printf("JB=%d ", step_data(px->B)); + else printf("?B=? "); + + { + int q = step_data(px->E); + int e = step_lane(px->E); + int k = e * x->Q + q; + if (px->E & STEP_ME) printf("ME=%d ", k); + else if (px->E & STEP_DE) printf("DE=%d ", k); + else printf("?E=? "); + } + + if (px->J & STEP_EJ) printf("EJ=%d ", step_data(px->J)); + else if (px->J & STEP_JJ) printf("JJ=%d ", step_data(px->J)); + else printf("?J=? "); + + if (px->C & STEP_EC) printf("EC=%d ", step_data(px->C)); + else if (px->C & STEP_CC) printf("CC=%d ", step_data(px->C)); + else printf("?C=? "); + + if (px->T & STEP_ET) printf("ET=%d ", step_data(px->T)); + else if (px->T & STEP_CT) printf("CT=%d ", step_data(px->T)); + else printf("?T=? "); + printf("l=%d K=%d\n", l, x->K); +} diff --git a/c-core/vit.h b/c-core/vit.h new file mode 100644 index 0000000..ec28e93 --- /dev/null +++ b/c-core/vit.h @@ -0,0 +1,25 @@ +#ifndef VIT_H +#define VIT_H + +#include "vith.h" + +struct trellis; + +struct vit; + +struct vit *vit_new(void); +void vit_del(struct vit const *); + +int vit_setup(struct vit *, int K); +void vit_set_extr_trans(struct vit *, enum extr_trans_id, float scalar); +void vit_set_core_trans(struct vit *, enum core_trans_id, float scalar, int k); +void vit_set_null(struct vit *, float scalar, int code); +void vit_set_background(struct vit *, float scalar, int code); +void vit_set_match(struct vit *, float scalar, int k, int code); +float vit_null(struct vit *, int L, viterbi_code_fn, void *); +float vit_cost(struct vit *, int L, viterbi_code_fn, void *); +int vit_path(struct vit *, int L, viterbi_code_fn, void *); + +struct trellis *vit_trellis(struct vit *); + +#endif diff --git a/c-core/viterbi.c b/c-core/viterbi.c deleted file mode 100644 index 7c898ed..0000000 --- a/c-core/viterbi.c +++ /dev/null @@ -1,303 +0,0 @@ -#include "tictoc.h" -#include "imm/lprob.h" -#include "vitfast.h" -#include "imm/path.h" -#include "protein.h" -#include "protein_node.h" -#include "viterbi_dp.h" -#include "viterbi_index.h" -#include "viterbi_onto.h" -#include "viterbi_path.h" -#include "viterbi_struct.h" -#include "viterbi_table.h" -#include "viterbi_xtrans.h" -#include -#include - -// Let m be the core size. -// We evaluate the HMM in the following order: -// -// -> S -// (S, N) -> N -// (S, N) -> B' (it will be adjusted later on) -// -// B0 -> M0 -// M0 -> E' (it will be adjusted later on) -// For each k in 0, 1, m-1: -// Let n = k + 1. -// (Mk, Ik) -> Ik -// (B', Mk, Ik, Dk) -> Mn -// (Mk, Dk) -> Dn -// (E', Mn, Dn) -> E' -// -// E' -> E -// (E , J) -> J -// (B', E, J) -> B -// (E , C) -> C -// (E , C) -> T - -void viterbi_init(struct viterbi *x) -{ - x->protein = NULL; - coredp_init(&x->dp); - trellis_init(&x->trellis); -} - -int viterbi_setup(struct viterbi *x, struct protein const *protein, - struct imm_eseq const *eseq) -{ - x->protein = protein; - x->seq = eseq; - - dp_fill(x->S, IMM_LPROB_ZERO); - dp_fill(x->N, IMM_LPROB_ZERO); - dp_fill(x->B, IMM_LPROB_ZERO); - dp_fill(x->J, IMM_LPROB_ZERO); - dp_fill(x->E, IMM_LPROB_ZERO); - dp_fill(x->C, IMM_LPROB_ZERO); - dp_fill(x->T, IMM_LPROB_ZERO); - - return coredp_setup(&x->dp, x->protein->core_size); -} - -void viterbi_cleanup(struct viterbi *x) -{ - trellis_cleanup(&x->trellis); - coredp_cleanup(&x->dp); -} - -float viterbi_null_loglik(struct viterbi *x) -{ - int seq_size = imm_eseq_size(x->seq); - - dp_fill(x->S, IMM_LPROB_ZERO); - dp_fill(x->R, IMM_LPROB_ZERO); - dp_set(x->S, 0, 0); - - DECLARE_INDEX(ix) = {0}; - DECLARE_TABLE(null) = {0}; - - for (int r = 0; r < seq_size + 1; ++r) - { - index_setup(ix, x->seq, r, false); - table_setup(null, x->protein->null.emission, ix, false); - - dp_set(x->R, 0, onto_R(x->S, x->R, x->protein->null.RR, null)); - dp_advance(x->S); - dp_advance(x->R); - dp_set(x->S, 0, IMM_LPROB_ZERO); - } - return dp_get(x->R, 1); -} - -INLINE void alternative(struct viterbi *x, int row_start, int row_end, - bool const safe, struct trellis *tr) -{ - int core_size = x->protein->core_size; - - struct viterbi_xtrans const xt = viterbi_xtrans_init(x->protein->xtrans); - - DECLARE_INDEX(ix) = {0}; - DECLARE_TABLE(null) = {0}; - DECLARE_TABLE(bg) = {0}; - DECLARE_TABLE(match) = {0}; - - if (tr) trellis_seek_xnode(tr, row_start); - if (tr) trellis_seek_node(tr, row_start, 0); - for (int r = row_start; r < row_end; ++r) - { - if (tr) trellis_clear_xnode(tr); - index_setup(ix, x->seq, r, safe); - table_setup(null, x->protein->null.emission, ix, safe); - table_setup(bg, x->protein->bg.emission, ix, safe); - - dp_set(x->N, 0, onto_N(tr, x->S, x->N, xt.SN, xt.NN, null)); - dp_set(x->B, 0, onto_B(tr, x->S, x->N, xt.SB, xt.NB)); - - dp_advance(x->S); - dp_advance(x->N); - - float *Mk = coredp_rewind(x->dp, STATE_M); - float *Ik = coredp_rewind(x->dp, STATE_I); - float *Dk = coredp_rewind(x->dp, STATE_D); - table_setup(match, x->protein->nodes[0].emission, ix, safe); - - if (tr) trellis_clear_node(tr); - // BM(0) -> M(0) - dp_set(Mk, 0, onto_M0(tr, x->B, x->protein->BMk[0], match)); - // M(0) -> E - float Emax = dp_get(Mk, 0) + xt.ME + 0; - // Skip transition into D0 state (does not exist) - - for (int k = 0; k + 1 < core_size; ++k) - { - int n = k + 1; - float const MM = x->protein->nodes[k].trans.MM; - float const MI = x->protein->nodes[k].trans.MI; - float const MD = x->protein->nodes[k].trans.MD; - float const IM = x->protein->nodes[k].trans.IM; - float const II = x->protein->nodes[k].trans.II; - float const DM = x->protein->nodes[k].trans.DM; - float const DD = x->protein->nodes[k].trans.DD; - float const BM = x->protein->BMk[n]; - table_setup(match, x->protein->nodes[n].emission, ix, safe); - - // [M(k), I(k)] -> I(k) - dp_set(Ik, 0, onto_I(tr, Mk, Ik, MI, II, bg)); - if (tr) trellis_next_node(tr); - if (tr) trellis_clear_node(tr); - - table_prefetch(x->protein->nodes[n + 1].emission, ix); - // [BM(n), M(k), I(k), D(k)] -> M(n) - float Mn = onto_M(tr, x->B, Mk, Ik, Dk, BM, MM, IM, DM, match); - - // [M(k), D(k)] -> D(n) - float Dn = onto_D(tr, Mk, Dk, MD, DD); - - dp_advance(Mk); - dp_advance(Ik); - dp_advance(Dk); - - Mk = coredp_next(Mk); - dp_set(Mk, 0, Mn); - - Ik = coredp_next(Ik); - Dk = coredp_next(Dk); - dp_set(Dk, 0, Dn); - - Emax = maximum(Emax, Mn + xt.ME + 0); - Emax = maximum(Emax, Dn + xt.DE + 0); - } - // Skip transition into Ik1 state (does not exist) - if (tr) trellis_next_node(tr); - dp_advance(Mk); - dp_advance(Ik); - dp_advance(Dk); - - if (tr) - dp_set(x->E, 0, onto_E(tr, x->dp, xt.ME, xt.DE, core_size)); - else - dp_set(x->E, 0, Emax); - - dp_set(x->J, 0, onto_J(tr, x->E, x->J, xt.EJ, xt.JJ, null)); - - dp_set(x->B, 0, adjust_onto_B(tr, x->B, x->E, x->J, xt.EB, xt.JB)); - - dp_advance(x->B); - dp_advance(x->J); - - dp_set(x->C, 0, onto_C(tr, x->E, x->C, xt.EC, xt.CC, null)); - dp_set(x->T, 0, onto_T(tr, x->E, x->C, xt.ET, xt.CT)); - - dp_advance(x->E); - dp_advance(x->C); - dp_advance(x->T); - - dp_set(x->S, 0, IMM_LPROB_ZERO); - if (tr) trellis_next_xnode(tr); - } -} - -static inline int row_mid(int end) -{ - return end < (DCP_PAST_SIZE - 1) ? end : (DCP_PAST_SIZE - 1); -} - -static int code_fn(int pos, int len, void *arg) -{ - struct imm_eseq const *seq = arg; - return imm_eseq_get(seq, pos, len, 1); -} - -float viterbi_alt_loglik(struct viterbi *x) -{ - assert(imm_eseq_size(x->seq) < INT_MAX); - int seq_size = imm_eseq_size(x->seq); - int end = seq_size + 1; - - dp_set(x->S, 0, 0); - tic(); - alternative(x, 0, row_mid(end), false, NULL); - alternative(x, row_mid(end), end, true, NULL); - toc("slow"); - float slow = dp_get(x->T, 1); - - struct vitfast *vit = vitfast_new(); - int K = x->protein->core_size; - vitfast_setup(vit, K); - - struct viterbi_xtrans const xt = viterbi_xtrans_init(x->protein->xtrans); - vitfast_set_extr_trans(vit, EXTR_TRANS_SN, -xt.SN); - vitfast_set_extr_trans(vit, EXTR_TRANS_NN, -xt.NN); - vitfast_set_extr_trans(vit, EXTR_TRANS_SB, -xt.SB); - vitfast_set_extr_trans(vit, EXTR_TRANS_NB, -xt.NB); - vitfast_set_extr_trans(vit, EXTR_TRANS_EB, -xt.EB); - vitfast_set_extr_trans(vit, EXTR_TRANS_JB, -xt.JB); - vitfast_set_extr_trans(vit, EXTR_TRANS_EJ, -xt.EJ); - vitfast_set_extr_trans(vit, EXTR_TRANS_JJ, -xt.JJ); - vitfast_set_extr_trans(vit, EXTR_TRANS_EC, -xt.EC); - vitfast_set_extr_trans(vit, EXTR_TRANS_CC, -xt.CC); - vitfast_set_extr_trans(vit, EXTR_TRANS_ET, -xt.ET); - vitfast_set_extr_trans(vit, EXTR_TRANS_CT, -xt.CT); - - for (int k = 0; k < K; ++k) - { - struct protein const *p = x->protein; - vitfast_set_core_trans(vit, CORE_TRANS_BM, -p->BMk[k], k); - } - - vitfast_set_core_trans(vit, CORE_TRANS_MM, INFINITY, 0); - vitfast_set_core_trans(vit, CORE_TRANS_MD, INFINITY, 0); - vitfast_set_core_trans(vit, CORE_TRANS_IM, INFINITY, 0); - vitfast_set_core_trans(vit, CORE_TRANS_DM, INFINITY, 0); - vitfast_set_core_trans(vit, CORE_TRANS_DD, INFINITY, 0); - for (int k = 0; k < K - 1; ++k) - { - struct protein const *p = x->protein; - vitfast_set_core_trans(vit, CORE_TRANS_MM, -p->nodes[k].trans.MM, k + 1); - vitfast_set_core_trans(vit, CORE_TRANS_MI, -p->nodes[k].trans.MI, k + 0); - vitfast_set_core_trans(vit, CORE_TRANS_MD, -p->nodes[k].trans.MD, k + 1); - vitfast_set_core_trans(vit, CORE_TRANS_IM, -p->nodes[k].trans.IM, k + 1); - vitfast_set_core_trans(vit, CORE_TRANS_II, -p->nodes[k].trans.II, k + 0); - vitfast_set_core_trans(vit, CORE_TRANS_DM, -p->nodes[k].trans.DM, k + 1); - vitfast_set_core_trans(vit, CORE_TRANS_DD, -p->nodes[k].trans.DD, k + 1); - } - vitfast_set_core_trans(vit, CORE_TRANS_MI, INFINITY, K - 1); - vitfast_set_core_trans(vit, CORE_TRANS_II, INFINITY, K - 1); - - for (size_t i = 0; i < VITFAST_TABLE_SIZE; ++i) - { - vitfast_set_null(vit, -x->protein->null.emission[i], i); - vitfast_set_background(vit, -x->protein->bg.emission[i], i); - - for (int k = 0; k < K; ++k) - vitfast_set_match(vit, -x->protein->nodes[k].emission[i], k, i); - } - - tic(); - float fast = -vitfast_cost(vit, seq_size, code_fn, (void *)x->seq); - toc("fast"); - if (fabsf(slow - fast) > 1e-7) {printf("%g %g: %g\n", slow, fast, slow-fast);exit(1);} - - vitfast_del(vit); - - return dp_get(x->T, 1); -} - -int viterbi_alt_path(struct viterbi *x, struct imm_path *path, float *loglik) -{ - assert(imm_eseq_size(x->seq) < INT_MAX); - int seq_size = imm_eseq_size(x->seq); - int end = seq_size + 1; - - int rc = trellis_setup(&x->trellis, x->protein->core_size, seq_size); - if (rc) return rc; - - dp_set(x->S, 0, 0); - alternative(x, 0, row_mid(end), false, &x->trellis); - alternative(x, row_mid(end), end, true, &x->trellis); - if (loglik) *loglik = dp_get(x->T, 1); - - imm_path_reset(path); - return unzip_path(&x->trellis, seq_size, path); -} diff --git a/c-core/viterbi.h b/c-core/viterbi.h deleted file mode 100644 index a163579..0000000 --- a/c-core/viterbi.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef VITERBI_H -#define VITERBI_H - -#include - -struct imm_eseq; -struct protein; -struct viterbi; -struct imm_path; - -// clang-format off -void viterbi_init(struct viterbi *); -int viterbi_setup(struct viterbi *, struct protein const *, struct imm_eseq const *); -void viterbi_cleanup(struct viterbi *); -float viterbi_null_loglik(struct viterbi *); -float viterbi_alt_loglik(struct viterbi *); -int viterbi_alt_path(struct viterbi *, struct imm_path *, float *loglik); -// clang-format on - -#endif diff --git a/c-core/viterbi_coredp.c b/c-core/viterbi_coredp.c deleted file mode 100644 index 1c290a9..0000000 --- a/c-core/viterbi_coredp.c +++ /dev/null @@ -1,28 +0,0 @@ -#include "viterbi_coredp.h" -#include "error.h" -#include "rc.h" -#include "xrealloc.h" -#include -#include -#include - -void coredp_init(float **x) { *x = NULL; } - -int coredp_setup(float **x, int core_size) -{ - size_t size = 3 * DCP_PAST_SIZE * core_size; - - *x = xrealloc(*x, sizeof(float) * size); - if (!*x && size > 0) return error(DCP_ENOMEM); - - for (size_t i = 0; i < size; ++i) - (*x)[i] = -INFINITY; - - return 0; -} - -void coredp_cleanup(float **x) -{ - free(*x); - *x = NULL; -} diff --git a/c-core/viterbi_coredp.h b/c-core/viterbi_coredp.h deleted file mode 100644 index df42339..0000000 --- a/c-core/viterbi_coredp.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef VITERBI_COREDP_H -#define VITERBI_COREDP_H - -#include "compiler.h" -#include "state.h" -#include "xlimits.h" -#include - -#define DECLARE_COREDP(name) float *name - -// clang-format off -void coredp_init(float **x); -int coredp_setup(float **, int core_size); -void coredp_cleanup(float **); -CONST float *coredp_next(float *x) { return x + 3 * DCP_PAST_SIZE; } -// clang-format on - -CONST float *coredp_rewind(float *x, int state) -{ - if (state == STATE_M) return x + 0 * DCP_PAST_SIZE; - if (state == STATE_I) return x + 1 * DCP_PAST_SIZE; - if (state == STATE_D) return x + 2 * DCP_PAST_SIZE; - UNREACHABLE(); - return NULL; -} - -#endif diff --git a/c-core/viterbi_dp.h b/c-core/viterbi_dp.h deleted file mode 100644 index 929cb15..0000000 --- a/c-core/viterbi_dp.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef VITERBI_DP_H -#define VITERBI_DP_H - -#include "compiler.h" -#include "xlimits.h" -#include - -#define DECLARE_DP(name) float name[DCP_PAST_SIZE] ALIGNED - -INLINE void dp_fill(float *x, float value) -{ - for (int i = 0; i < DCP_PAST_SIZE; ++i) - x[i] = value; -} - -CONST float dp_get(float const x[restrict], int look_back) -{ - return x[look_back]; -} - -INLINE void dp_set(float x[restrict], int look_back, float value) -{ - x[look_back] = value; -} - -INLINE void dp_advance(float x[]) -{ - memmove(&x[1], &x[0], sizeof(float) * (DCP_PAST_SIZE - 1)); -} - -#endif diff --git a/c-core/viterbi_dump.c b/c-core/viterbi_dump.c deleted file mode 100644 index 9876520..0000000 --- a/c-core/viterbi_dump.c +++ /dev/null @@ -1,185 +0,0 @@ -#include "viterbi_dump.h" -#include "array_size_field.h" -#include "imm/dump.h" -#include "imm/fmt.h" -#include "imm/lprob.h" -#include "protein.h" -#include "protein_node.h" -#include "viterbi_xtrans.h" - -void viterbi_dump(struct protein *x, FILE *fp) -{ - int core_size = x->core_size; - - float const mute_emission = IMM_LPROB_ONE; - float const emis_B = mute_emission; - float const emis_D = mute_emission; - float const emis_E = mute_emission; - float const emis_T = mute_emission; - - char const *f32f = imm_fmt_get_f32(); - - fprintf(fp, "B: "); - fprintf(fp, f32f, emis_B); - fputc('\n', fp); - - fprintf(fp, "D: "); - fprintf(fp, f32f, emis_D); - fputc('\n', fp); - - fprintf(fp, "E: "); - fprintf(fp, f32f, emis_E); - fputc('\n', fp); - - fprintf(fp, "T: "); - fprintf(fp, f32f, emis_T); - fputc('\n', fp); - - float const *restrict null_emission = x->null.emission; - float const *restrict background_emission = x->bg.emission; - float const *restrict emis_I = background_emission; - float const *restrict emis_N = null_emission; - float const *restrict emis_J = null_emission; - float const *restrict emis_C = null_emission; - size_t bg_size = array_size_field(struct protein_background, emission); - - fprintf(fp, "I*: "); - imm_dump_array_f32(bg_size, emis_I, fp); - fputc('\n', fp); - - fprintf(fp, "N: ["); - imm_dump_array_f32(bg_size, emis_N, fp); - fprintf(fp, "]\n"); - - fprintf(fp, "J: ["); - imm_dump_array_f32(bg_size, emis_J, fp); - fprintf(fp, "]\n"); - - fprintf(fp, "C: ["); - imm_dump_array_f32(bg_size, emis_C, fp); - fprintf(fp, "]\n"); - - for (int k = 0; k < core_size; ++k) - { - float const *match_emission = x->nodes[k].emission; - - fprintf(fp, "M%d: ", k + 1); - size_t n = PROTEIN_NODE_SIZE; - imm_dump_array_f32(n, match_emission, fp); - fputc('\n', fp); - } -} - -void viterbi_dump_dot(struct protein *x, FILE *fp) -{ - char const *f32f = imm_fmt_get_f32(); - - struct viterbi_xtrans const xtrans = viterbi_xtrans_init(x->xtrans); - - fprintf(fp, "S -> B [label="); - fprintf(fp, f32f, xtrans.SB); - fprintf(fp, "];\n"); - - fprintf(fp, "S -> N [label="); - fprintf(fp, f32f, xtrans.SN); - fprintf(fp, "];\n"); - - fprintf(fp, "N -> N [label="); - fprintf(fp, f32f, xtrans.NN); - fprintf(fp, "];\n"); - - fprintf(fp, "N -> B [label="); - fprintf(fp, f32f, xtrans.NB); - fprintf(fp, "];\n"); - - fprintf(fp, "E -> T [label="); - fprintf(fp, f32f, xtrans.ET); - fprintf(fp, "];\n"); - - fprintf(fp, "E -> C [label="); - fprintf(fp, f32f, xtrans.EC); - fprintf(fp, "];\n"); - - fprintf(fp, "C -> C [label="); - fprintf(fp, f32f, xtrans.CC); - fprintf(fp, "];\n"); - - fprintf(fp, "C -> T [label="); - fprintf(fp, f32f, xtrans.CT); - fprintf(fp, "];\n"); - - fprintf(fp, "E -> B [label="); - fprintf(fp, f32f, xtrans.EB); - fprintf(fp, "];\n"); - - fprintf(fp, "E -> J [label="); - fprintf(fp, f32f, xtrans.EJ); - fprintf(fp, "];\n"); - - fprintf(fp, "J -> J [label="); - fprintf(fp, f32f, xtrans.JJ); - fprintf(fp, "];\n"); - - fprintf(fp, "J -> B [label="); - fprintf(fp, f32f, xtrans.JB); - fprintf(fp, "];\n"); - - int core_size = x->core_size; - for (int k = 0; k + 1 < core_size; ++k) - { - struct trans const *restrict trans = &x->nodes[k].trans; - int i0 = k + 1; - int i1 = k + 2; - fprintf(fp, "D%d -> D%d [label=", i0, i1); - fprintf(fp, f32f, trans->DD); - fprintf(fp, "];\n"); - - fprintf(fp, "D%d -> M%d [label=", i0, i1); - fprintf(fp, f32f, trans->DM); - fprintf(fp, "];\n"); - - fprintf(fp, "I%d -> I%d [label=", i0, i0); - fprintf(fp, f32f, trans->II); - fprintf(fp, "];\n"); - - fprintf(fp, "I%d -> M%d [label=", i0, i1); - fprintf(fp, f32f, trans->IM); - fprintf(fp, "];\n"); - - fprintf(fp, "M%d -> I%d [label=", i0, i0); - fprintf(fp, f32f, trans->MI); - fprintf(fp, "];\n"); - - fprintf(fp, "M%d -> M%d [label=", i0, i1); - fprintf(fp, f32f, trans->MM); - fprintf(fp, "];\n"); - - fprintf(fp, "M%d -> D%d [label=", i0, i1); - fprintf(fp, f32f, trans->MD); - fprintf(fp, "];\n"); - - fprintf(fp, "M%d -> E [label=", i0); - fprintf(fp, f32f, xtrans.ME); - fprintf(fp, "];\n"); - - fprintf(fp, "D%d -> E [label=", i0); - fprintf(fp, f32f, xtrans.DE); - fprintf(fp, "];\n"); - } - - fprintf(fp, "M%d -> E [label=", core_size); - fprintf(fp, f32f, xtrans.ME); - fprintf(fp, "];\n"); - - fprintf(fp, "D%d -> E [label=", core_size); - fprintf(fp, f32f, xtrans.DE); - fprintf(fp, "];\n"); - - float const *restrict trans_BM = x->BMk; - for (int k = 0; k < core_size; ++k) - { - fprintf(fp, "B -> M%d [label=", k + 1); - fprintf(fp, f32f, trans_BM[k]); - fprintf(fp, "];\n"); - } -} diff --git a/c-core/viterbi_dump.h b/c-core/viterbi_dump.h deleted file mode 100644 index f3c51c2..0000000 --- a/c-core/viterbi_dump.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef VITERBI_DUMP_H -#define VITERBI_DUMP_H - -#include - -struct protein; - -void viterbi_dump(struct protein *, FILE *); -void viterbi_dump_dot(struct protein *, FILE *); - -#endif diff --git a/c-core/viterbi_index.h b/c-core/viterbi_index.h deleted file mode 100644 index 41f29f6..0000000 --- a/c-core/viterbi_index.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef VITERBI_INDEX_H -#define VITERBI_INDEX_H - -#include "compiler.h" -#include "imm/eseq.h" -#include "xlimits.h" -#include - -#define DECLARE_INDEX(name) int name[DCP_PAST_SIZE - 1] ALIGNED - -INLINE void index_setup(int index[restrict], struct imm_eseq const *eseq, - int row, bool const safe) -{ -#pragma GCC unroll(DCP_PAST_SIZE - 1) - for (int i = 0; i < DCP_PAST_SIZE - 1; ++i) - { - int pos = row - i - 1; - int size = i + 1; - index[i] = (!safe && pos < 0) ? -1 : imm_eseq_get(eseq, pos, size, 1); - } -} - -#endif diff --git a/c-core/viterbi_onto.h b/c-core/viterbi_onto.h deleted file mode 100644 index e764670..0000000 --- a/c-core/viterbi_onto.h +++ /dev/null @@ -1,304 +0,0 @@ -#ifndef VITERBI_ONTO_H -#define VITERBI_ONTO_H - -#if __ARM_NEON -#include "argmax_neon.h" -#elif __AVX__ -#include "argmax_avx.h" -#else -#include "argmax_generic.h" -#endif - -#include "compiler.h" -#include "trellis.h" -#include "viterbi_coredp.h" -#include "viterbi_dp.h" -#include "viterbi_table.h" - -PURE float onto_R(float const S[restrict], float const R[restrict], - float const RR, float const e[restrict]) -{ - // clang-format off - float const x[] ALIGNED = { - dp_get(S, 1) + 0 + table_get(e, 1), - dp_get(S, 2) + 0 + table_get(e, 2), - dp_get(S, 3) + 0 + table_get(e, 3), - dp_get(S, 4) + 0 + table_get(e, 4), - dp_get(S, 5) + 0 + table_get(e, 5), - - dp_get(R, 1) + RR + table_get(e, 1), - dp_get(R, 2) + RR + table_get(e, 2), - dp_get(R, 3) + RR + table_get(e, 3), - dp_get(R, 4) + RR + table_get(e, 4), - dp_get(R, 5) + RR + table_get(e, 5), - }; - // clang-format on - return vmax10(x); -} - -INLINE float onto_N(struct trellis *t, float const S[restrict], - float const N[restrict], float const SN, float const NN, - float const e[restrict]) -{ - // clang-format off - float const x[] ALIGNED = { - dp_get(S, 1) + SN + table_get(e, 1), - dp_get(S, 2) + SN + table_get(e, 2), - dp_get(S, 3) + SN + table_get(e, 3), - dp_get(S, 4) + SN + table_get(e, 4), - dp_get(S, 5) + SN + table_get(e, 5), - - dp_get(N, 1) + NN + table_get(e, 1), - dp_get(N, 2) + NN + table_get(e, 2), - dp_get(N, 3) + NN + table_get(e, 3), - dp_get(N, 4) + NN + table_get(e, 4), - dp_get(N, 5) + NN + table_get(e, 5), - }; - if (!t) return vmax10(x); - // clang-format on - - float val; - trellis_set(t, STATE_N, argmax10(&val, x)); - return val; -} - -INLINE float onto_B(struct trellis *t, float const S[restrict], - float const N[restrict], float const SB, float const NB) -{ - // clang-format off - float const x[] ALIGNED = { - dp_get(S, 0) + SB + 0, - dp_get(N, 0) + NB + 0, - }; - if (!t) return maximum(x[0], x[1]); - // clang-format on - - float val; - trellis_set(t, STATE_B, argmax2(&val, x)); - return val; -} - -INLINE float adjust_onto_B(struct trellis *t, float const B[restrict], - float const E[restrict], float const J[restrict], - float const EB, float const JB) -{ - // clang-format off - float const x[] ALIGNED = { - dp_get(B, 0), - dp_get(E, 0) + EB + 0, - dp_get(J, 0) + JB + 0, - }; - if (!t) return vmax3(x); - - int const src[] = { - -1, - 2, - 3, - }; - // clang-format on - - float val; - int i = argmax3(&val, x); - if (i > 0) trellis_replace(t, STATE_B, src[i]); - return val; -} - -INLINE float onto_M0(struct trellis *t, float const B[restrict], float const BM, - float const e[restrict]) -{ - // clang-format off - float const x[] ALIGNED = { - dp_get(B, 1) + BM + table_get(e, 1), - dp_get(B, 2) + BM + table_get(e, 2), - dp_get(B, 3) + BM + table_get(e, 3), - dp_get(B, 4) + BM + table_get(e, 4), - dp_get(B, 5) + BM + table_get(e, 5), - }; - if (!t) return vmax5(x); - // clang-format on - - float val; - trellis_set(t, STATE_M, argmax5(&val, x)); - return val; -} - -INLINE float onto_I(struct trellis *t, float const M[restrict], - float const I[restrict], float const MI, float const II, - float const e[restrict]) -{ - // clang-format off - float const x[] ALIGNED = { - dp_get(M, 1) + MI + table_get(e, 1), - dp_get(M, 2) + MI + table_get(e, 2), - dp_get(M, 3) + MI + table_get(e, 3), - dp_get(M, 4) + MI + table_get(e, 4), - dp_get(M, 5) + MI + table_get(e, 5), - - dp_get(I, 1) + II + table_get(e, 1), - dp_get(I, 2) + II + table_get(e, 2), - dp_get(I, 3) + II + table_get(e, 3), - dp_get(I, 4) + II + table_get(e, 4), - dp_get(I, 5) + II + table_get(e, 5), - }; - if (!t) return vmax10(x); - // clang-format on - - float val; - trellis_set(t, STATE_I, argmax10(&val, x)); - return val; -} - -INLINE float onto_M(struct trellis *t, float const B[restrict], - float const M[restrict], float const I[restrict], - float const D[restrict], float const BM, float const MM, - float const IM, float const DM, float const e[restrict]) -{ - // clang-format off - float const x[] ALIGNED = { - dp_get(B, 1) + BM + table_get(e, 1), - dp_get(B, 2) + BM + table_get(e, 2), - dp_get(B, 3) + BM + table_get(e, 3), - dp_get(B, 4) + BM + table_get(e, 4), - dp_get(B, 5) + BM + table_get(e, 5), - - dp_get(M, 1) + MM + table_get(e, 1), - dp_get(M, 2) + MM + table_get(e, 2), - dp_get(M, 3) + MM + table_get(e, 3), - dp_get(M, 4) + MM + table_get(e, 4), - dp_get(M, 5) + MM + table_get(e, 5), - - dp_get(I, 1) + IM + table_get(e, 1), - dp_get(I, 2) + IM + table_get(e, 2), - dp_get(I, 3) + IM + table_get(e, 3), - dp_get(I, 4) + IM + table_get(e, 4), - dp_get(I, 5) + IM + table_get(e, 5), - - dp_get(D, 1) + DM + table_get(e, 1), - dp_get(D, 2) + DM + table_get(e, 2), - dp_get(D, 3) + DM + table_get(e, 3), - dp_get(D, 4) + DM + table_get(e, 4), - dp_get(D, 5) + DM + table_get(e, 5), - }; - if (!t) return vmax20(x); - // clang-format on - - float val; - trellis_set(t, STATE_M, argmax20(&val, x)); - return val; -} - -INLINE float onto_D(struct trellis *t, float const M[restrict], - float const D[restrict], float const MD, float const DD) -{ - // clang-format off - float const x[] ALIGNED = { - dp_get(M, 0) + MD + 0, - dp_get(D, 0) + DD + 0, - }; - if (!t) return maximum(x[0], x[1]); - // clang-format on - - float val; - trellis_set(t, STATE_D, argmax2(&val, x)); - return val; -} - -INLINE void fmax_idx(float *value, int *src, float new_value, int new_src) -{ - if (new_value > *value) - { - *value = new_value; - *src = new_src; - } -} - -INLINE float onto_E(struct trellis *t, float *restrict dp, float const ME, - float const DE, int const core_size) -{ - float *Mk = coredp_rewind(dp, STATE_M); - float *Dk = coredp_rewind(dp, STATE_D); - float x = dp_get(Mk, 1) + ME; - // int src = MIX(0); - int src = 0; - for (int i = 2; i < 2 * core_size; i += 2) - { - Mk = coredp_next(Mk); - Dk = coredp_next(Dk); - // It is look_back=1 instead of look_back=0 because I already called - // make_future(DPM) and make_future(DPD), for performance reasons. - fmax_idx(&x, &src, dp_get(Mk, 1) + ME + 0, i + 0); - fmax_idx(&x, &src, dp_get(Dk, 1) + DE + 0, i + 1); - } - trellis_set(t, STATE_E, src); - return x; -} - -INLINE float onto_J(struct trellis *t, float const E[restrict], - float const J[restrict], float const EJ, float const JJ, - float const e[restrict]) -{ - // clang-format off - float const x[] ALIGNED = { - dp_get(E, 1) + EJ + table_get(e, 1), - dp_get(E, 2) + EJ + table_get(e, 2), - dp_get(E, 3) + EJ + table_get(e, 3), - dp_get(E, 4) + EJ + table_get(e, 4), - dp_get(E, 5) + EJ + table_get(e, 5), - - dp_get(J, 1) + JJ + table_get(e, 1), - dp_get(J, 2) + JJ + table_get(e, 2), - dp_get(J, 3) + JJ + table_get(e, 3), - dp_get(J, 4) + JJ + table_get(e, 4), - dp_get(J, 5) + JJ + table_get(e, 5), - }; - if (!t) return vmax10(x); - // clang-format on - - float val; - trellis_set(t, STATE_J, argmax10(&val, x)); - return val; -} - -INLINE float onto_C(struct trellis *t, float const E[restrict], - float const C[restrict], float const EC, float const CC, - float const e[restrict]) -{ - // clang-format off - float const x[] ALIGNED = { - dp_get(E, 1) + EC + table_get(e, 1), - dp_get(E, 2) + EC + table_get(e, 2), - dp_get(E, 3) + EC + table_get(e, 3), - dp_get(E, 4) + EC + table_get(e, 4), - dp_get(E, 5) + EC + table_get(e, 5), - - dp_get(C, 1) + CC + table_get(e, 1), - dp_get(C, 2) + CC + table_get(e, 2), - dp_get(C, 3) + CC + table_get(e, 3), - dp_get(C, 4) + CC + table_get(e, 4), - dp_get(C, 5) + CC + table_get(e, 5), - }; - if (!t) return vmax10(x); - // clang-format on - - float val; - trellis_set(t, STATE_C, argmax10(&val, x)); - return val; -} - -INLINE float onto_T(struct trellis *t, float const E[restrict], - float const C[restrict], float const ET, float const CT) -{ - // clang-format off - float const x[] ALIGNED = { - dp_get(E, 0) + ET + 0, - dp_get(C, 0) + CT + 0, - }; - if (!t) return maximum(x[0], x[1]); - // clang-format on - - float val; - trellis_set(t, STATE_T, argmax2(&val, x)); - return val; -} - -#endif diff --git a/c-core/viterbi_path.c b/c-core/viterbi_path.c deleted file mode 100644 index 3e15120..0000000 --- a/c-core/viterbi_path.c +++ /dev/null @@ -1,30 +0,0 @@ -#include "viterbi_path.h" -#include "error.h" -#include "imm/path.h" -#include "rc.h" -#include "state.h" -#include "trellis.h" -#include - -int unzip_path(struct trellis *x, int seq_size, struct imm_path *path) -{ - int state = state_make_end(); - assert(seq_size <= INT_MAX); - int stage = seq_size; - trellis_seek_xnode(x, stage); - - while (!state_is_start(state) || stage) - { - int size = trellis_emission_size(x, state); - if (imm_path_add(path, imm_step(state, size, 0))) return error(DCP_ENOMEM); - state = trellis_previous_state(x, state); - stage -= size; - if (state_is_core(state)) - trellis_seek_node(x, stage, state_core_idx(state)); - else - trellis_seek_xnode(x, stage); - } - if (imm_path_add(path, imm_step(state, 0, 0))) return error(DCP_ENOMEM); - imm_path_reverse(path); - return 0; -} diff --git a/c-core/viterbi_path.h b/c-core/viterbi_path.h deleted file mode 100644 index 5b69959..0000000 --- a/c-core/viterbi_path.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef VITERBI_PATH_H -#define VITERBI_PATH_H - -struct trellis; -struct imm_path; - -int unzip_path(struct trellis *x, int seq_size, struct imm_path *path); - -#endif diff --git a/c-core/viterbi_struct.h b/c-core/viterbi_struct.h deleted file mode 100644 index ddec4b7..0000000 --- a/c-core/viterbi_struct.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef VITERBI_STRUCT_H -#define VITERBI_STRUCT_H - -#include "trellis.h" -#include "viterbi_coredp.h" -#include "viterbi_dp.h" - -struct protein; - -struct viterbi -{ - struct protein const *protein; - struct imm_eseq const *seq; - DECLARE_DP(R); - DECLARE_COREDP(dp); - DECLARE_DP(S); - DECLARE_DP(N); - DECLARE_DP(B); - DECLARE_DP(J); - DECLARE_DP(E); - DECLARE_DP(C); - DECLARE_DP(T); - struct trellis trellis; -}; - -#endif diff --git a/c-core/viterbi_table.h b/c-core/viterbi_table.h deleted file mode 100644 index 89c3e96..0000000 --- a/c-core/viterbi_table.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef VITERBI_TABLE_H -#define VITERBI_TABLE_H - -#include "compiler.h" -#include "xlimits.h" -#include -#include - -#define DECLARE_TABLE(name) float name[DCP_PAST_SIZE - 1] ALIGNED - -INLINE void table_setup(float x[restrict], float const emission[restrict], - int const index[restrict], bool const safe) -{ -#pragma GCC unroll(DCP_PAST_SIZE - 1) - for (int i = 0; i < DCP_PAST_SIZE - 1; ++i) - x[i] = (!safe && index[i] < 0) ? -INFINITY : emission[index[i]]; -} - -INLINE void table_prefetch(float const emission[restrict], - int const index[restrict]) -{ -#pragma GCC unroll(DCP_PAST_SIZE - 1) - for (int i = 0; i < DCP_PAST_SIZE - 1; ++i) - PREFETCH(emission + index[i], 0, 1); -} - -CONST float table_get(float const emission[restrict], int num_chars) -{ - return emission[num_chars - 1]; -} - -#endif diff --git a/c-core/viterbi_xtrans.h b/c-core/viterbi_xtrans.h deleted file mode 100644 index 5670830..0000000 --- a/c-core/viterbi_xtrans.h +++ /dev/null @@ -1,51 +0,0 @@ -#ifndef VITERBI_XTRANS_H -#define VITERBI_XTRANS_H - -#include "compiler.h" -#include "xtrans.h" - -struct viterbi_xtrans -{ - float const SB; - float const SN; - float const NN; - float const NB; - - float const ET; - float const EC; - float const CC; - float const CT; - - float const EB; - float const EJ; - float const JJ; - float const JB; - - float const ME; - float const DE; -}; - -CONST struct viterbi_xtrans viterbi_xtrans_init(struct xtrans x) -{ - return (struct viterbi_xtrans){ - .SB = x.NB, - .SN = x.NN, - .NN = x.NN, - .NB = x.NB, - - .ET = x.EC + x.CT, - .EC = x.EC + x.CC, - .CC = x.CC, - .CT = x.CT, - - .EB = x.EJ + x.JB, - .EJ = x.EJ + x.JJ, - .JJ = x.JJ, - .JB = x.JB, - - .ME = 0.0f, - .DE = 0.0f, - }; -} - -#endif diff --git a/c-core/vitfast.c b/c-core/vitfast.c deleted file mode 100644 index 7be72ae..0000000 --- a/c-core/vitfast.c +++ /dev/null @@ -1,502 +0,0 @@ -#include "vitfast.h" -#include -#include - -#if !defined(__AVX__) && !defined(__ARM_NEON) - #error "We require either AVX or NEON feature." -#endif - -#if __ARM_NEON - #include - typedef float32x4_t pack; - #define NUM_LANES 4 - #define ALIGNMENT 16 -#endif - -#if __AVX__ - #include - typedef __m256 pack; - #define NUM_LANES 8 - #define ALIGNMENT 32 -#endif - -struct emission -{ - float null[VITFAST_TABLE_SIZE]; - pack background[VITFAST_TABLE_SIZE]; - pack *match; -}; - -struct extr_trans -{ - float SN; - float NN; - - float SB; - float NB; - float EB; - float JB; - - float EJ; - float JJ; - - float EC; - float CC; - - float ET; - float CT; -}; - -struct core_trans -{ - pack BM; - pack MM; - pack MI; - pack MD; - pack IM; - pack II; - pack DM; - pack DD; -}; - -struct extr_state -{ - // Prefix - float S; - float N; - float B; - - // Infix - float J; - - // Suffix - float E; - float C; - float T; -}; - -struct core_state -{ - pack M; - pack D; - pack I; -}; - -struct vitfast -{ - int Q; - int maxQ; - struct extr_state extr_state[VITFAST_TIME_FRAME]; - struct core_state *core_state; - - struct emission emission; - - struct extr_trans extr_trans; - struct core_trans *core_trans; -}; - -struct vitfast *vitfast_new(void) -{ - struct vitfast *x = aligned_alloc(ALIGNMENT, sizeof(struct vitfast)); - if (!x) return x; - - x->Q = x->maxQ = 0; - x->core_state = NULL; - x->core_trans = NULL; - x->emission.match = NULL; - - return x; -} - -#define INLINE static inline __attribute__((always_inline)) - -static inline void extr_trans_init(struct extr_trans *); -static inline void core_trans_init(struct core_trans *); -static inline void extr_state_init(struct extr_state *, int t); -static inline void core_state_init(struct core_state *, int t, int q, int Q); -static inline void emission_init(struct emission *, int Q); - -int vitfast_setup(struct vitfast *x, int K) -{ - int Q = x->Q = vitfast_num_packs(K); - - for (int t = 0; t < VITFAST_TIME_FRAME; ++t) - extr_state_init(x->extr_state, t); - - if (Q > x->maxQ) - { - free(x->core_state); - x->core_state = - aligned_alloc(ALIGNMENT, sizeof(struct core_state[VITFAST_TIME_FRAME][Q])); - if (!x->core_state) return 1; - - free(x->emission.match); - x->emission.match = aligned_alloc(ALIGNMENT, sizeof(pack[VITFAST_TABLE_SIZE][Q])); - if (!x->emission.match) return 1; - - free(x->core_trans); - x->core_trans = aligned_alloc(ALIGNMENT, sizeof(struct core_trans[Q])); - if (!x->core_trans) return 1; - - x->maxQ = Q; - } - - extr_trans_init(&x->extr_trans); - for (int q = 0; q < Q; ++q) - core_trans_init(&x->core_trans[q]); - - for (int q = 0; q < Q; ++q) - { - for (int t = 0; t < VITFAST_TIME_FRAME; ++t) - core_state_init(x->core_state, t, q, Q); - } - - emission_init(&x->emission, Q); - - return 0; -} - -void vitfast_del(struct vitfast const *x) -{ - if (x) - { - free(x->core_state); - free(x->core_trans); - free(x->emission.match); - free((void *)x); - } -} - -#if __ARM_NEON -INLINE pack min(pack a, pack b) { return vminq_f32(a, b); } -INLINE pack add(pack a, pack b) { return vaddq_f32(a, b); } -INLINE pack dup(float x) { return vdupq_n_f32(x); } -INLINE pack shift(pack x) { return vextq_f32(dup(INFINITY), x, NUM_LANES - 1); } -INLINE float hmin(pack x) { return vminvq_f32(x); } -#endif - -#if __AVX__ -INLINE pack min(pack a, pack b) { return _mm256_min_ps(a, b); } -INLINE pack add(pack a, pack b) { return _mm256_add_ps(a, b); } -INLINE pack dup(float x) { return _mm256_set1_ps(x); } - -INLINE pack shift(pack x) -{ - x = _mm256_permutevar8x32_ps(x, _mm256_set_epi32(6, 5, 4, 3, 2, 1, 0, 7)); - return _mm256_blend_ps(x, dup(INFINITY), _MM_SHUFFLE(0, 0, 0, 1)); -} - -INLINE float hmin(pack x) -{ - x = _mm256_min_ps(x, _mm256_permute_ps(x, _MM_SHUFFLE(2, 3, 0, 1))); - x = _mm256_min_ps(x, _mm256_permute_ps(x, _MM_SHUFFLE(1, 0, 3, 2))); - x = _mm256_min_ps(x, _mm256_permute2f128_ps(x, x, _MM_SHUFFLE(0, 0, 0, 1))); - return _mm_cvtss_f32(_mm256_castps256_ps128(x)); -} -#endif - -INLINE int time_map(int q, int t, int Q) { return t * Q + q; } -INLINE int imin(int a, int b) { return a < b ? a : b; } -INLINE pack sum(pack a, pack b, pack c) { return add(add(a, b), c); } -INLINE int core_pack(int k, int Q) { return k % Q; } -INLINE int core_lane(int k, int Q) { return k / Q; } -INLINE int all_leq(pack a, pack b); -INLINE void set(pack *, float scalar, int e); - -float vitfast_cost(struct vitfast *x, int L, vitfast_code_fn code_fn, - void *code_arg) -{ - struct emission const em = x->emission; - - struct extr_trans const xt = x->extr_trans; - struct core_trans const *ct = x->core_trans; - - struct extr_state *xs = x->extr_state; - struct core_state *cs = x->core_state; - -#define xM(q, t) cs[time_map(q, t, Q)].M -#define xD(q, t) cs[time_map(q, t, Q)].D -#define xI(q, t) cs[time_map(q, t, Q)].I - - int Q = x->Q; - - // For l = 0 - xs[0].S = 0; - xs[0].B = xt.SB; - - for (int l = 1; l <= L; ++l) - { - extr_state_init(xs, imin((VITFAST_TIME_FRAME - 1), l)); - for (int q = 0; q < Q; ++q) - core_state_init(cs, imin((VITFAST_TIME_FRAME - 1), l), q, Q); - - for (int t = imin((VITFAST_TIME_FRAME - 1), l); t > 0; --t) - { - int code = code_fn(l - t, t, code_arg); - float nil = em.null[code]; - int a = t; - int z = t - 1; - - xs[a].N = fminf(xs[a].N, xs[z].S + xt.SN + nil); - xs[a].N = fminf(xs[a].N, xs[z].N + xt.NN + nil); - - xs[a].B = fminf(xs[a].B, xs[a].S + xt.SB); - xs[a].B = fminf(xs[a].B, xs[a].N + xt.NB); - - xs[a].J = fminf(xs[a].J, xs[z].E + xt.EJ + nil); - xs[a].J = fminf(xs[a].J, xs[z].J + xt.JJ + nil); - - xs[a].C = fminf(xs[a].C, xs[z].E + xt.EC + nil); - xs[a].C = fminf(xs[a].C, xs[z].C + xt.CC + nil); - - pack lastMz = shift(xM(Q - 1, z)); - pack lastDz = shift(xD(Q - 1, z)); - pack lastIz = shift(xI(Q - 1, z)); - pack lastMa = shift(xM(Q - 1, a)); - pack currBz = dup(xs[z].B); - pack accumE = dup(INFINITY); - pack bg = em.background[code]; - for (int q = 0; q < Q; ++q) - { - pack ma = em.match[code * Q + q]; - pack xMa = xM(q, a); - pack xDa = xD(q, a); - pack xIa = xI(q, a); - - xMa = min(xMa, sum(currBz, ct[q].BM, ma)); - xMa = min(xMa, sum(lastMz, ct[q].MM, ma)); - xMa = min(xMa, sum(lastIz, ct[q].IM, ma)); - xMa = min(xMa, sum(lastDz, ct[q].DM, ma)); - - lastMz = xM(q, z); - lastDz = xD(q, z); - lastIz = xI(q, z); - - xIa = min(xIa, sum(lastMz, ct[q].MI, bg)); - xIa = min(xIa, sum(lastIz, ct[q].II, bg)); - - xDa = min(xDa, add(lastMa, ct[q].MD)); - - accumE = min(accumE, min(xMa, xDa)); - - lastMa = xMa; - - xM(q, a) = lastMz; - xD(q, a) = lastDz; - xI(q, a) = lastIz; - xM(q, z) = xMa; - xD(q, z) = xDa; - xI(q, z) = xIa; - } - xD(0, z) = min(xD(0, z), add(shift(lastMa), ct[0].MD)); - xs[a].E = hmin(min(accumE, xD(0, z))); - - pack lastD0 = shift(xD(Q - 1, z)); - for (int q = 0; q < Q; ++q) - lastD0 = xD(q, z) = min(xD(q, z), add(lastD0, ct[q].DD)); - - int q = 0; - do - { - lastD0 = shift(lastD0); - for (q = 0; q < Q; ++q) - { - pack x = add(lastD0, ct[q].DD); - if (all_leq(xD(q, z), x)) break; - lastD0 = xD(q, z) = min(xD(q, z), x); - } - } while (q == Q); - - xs[a].B = fminf(xs[a].B, xs[a].E + xt.EB); - xs[a].B = fminf(xs[a].B, xs[a].J + xt.JB); - - xs[a].T = fminf(xs[a].T, xs[a].E + xt.ET); - xs[a].T = fminf(xs[a].T, xs[a].C + xt.CT); - - struct extr_state tmp = xs[z]; - xs[z] = xs[a]; - xs[a] = tmp; - } - } - -#undef xM -#undef xD -#undef xI - - return xs[0].T; -} - -void vitfast_set_extr_trans(struct vitfast *x, enum extr_trans_id id, - float scalar) -{ - switch (id) - { - case EXTR_TRANS_SN: x->extr_trans.SN = scalar; break; - case EXTR_TRANS_NN: x->extr_trans.NN = scalar; break; - case EXTR_TRANS_SB: x->extr_trans.SB = scalar; break; - case EXTR_TRANS_NB: x->extr_trans.NB = scalar; break; - case EXTR_TRANS_EB: x->extr_trans.EB = scalar; break; - case EXTR_TRANS_JB: x->extr_trans.JB = scalar; break; - case EXTR_TRANS_EJ: x->extr_trans.EJ = scalar; break; - case EXTR_TRANS_JJ: x->extr_trans.JJ = scalar; break; - case EXTR_TRANS_EC: x->extr_trans.EC = scalar; break; - case EXTR_TRANS_CC: x->extr_trans.CC = scalar; break; - case EXTR_TRANS_ET: x->extr_trans.ET = scalar; break; - case EXTR_TRANS_CT: x->extr_trans.CT = scalar; break; - default: - __builtin_unreachable(); - break; - } -} - -void vitfast_set_core_trans(struct vitfast *x, enum core_trans_id id, - float scalar, int k) -{ - int q = core_pack(k, x->Q); - int e = core_lane(k, x->Q); - - switch (id) - { - case CORE_TRANS_BM: set(&x->core_trans[q].BM, scalar, e); break; - case CORE_TRANS_MM: set(&x->core_trans[q].MM, scalar, e); break; - case CORE_TRANS_MI: set(&x->core_trans[q].MI, scalar, e); break; - case CORE_TRANS_MD: set(&x->core_trans[q].MD, scalar, e); break; - case CORE_TRANS_IM: set(&x->core_trans[q].IM, scalar, e); break; - case CORE_TRANS_II: set(&x->core_trans[q].II, scalar, e); break; - case CORE_TRANS_DM: set(&x->core_trans[q].DM, scalar, e); break; - case CORE_TRANS_DD: set(&x->core_trans[q].DD, scalar, e); break; - default: - __builtin_unreachable(); - break; - } -} - -void vitfast_set_null(struct vitfast *x, float scalar, int code) -{ - x->emission.null[code] = scalar; -} - -void vitfast_set_background(struct vitfast *x, float scalar, int code) -{ - x->emission.background[code] = dup(scalar); -} - -void vitfast_set_match(struct vitfast *x, float scalar, int k, int code) -{ - int q = core_pack(k, x->Q); - int e = core_lane(k, x->Q); - set(&x->emission.match[code * x->Q + q], scalar, e); -} - -int vitfast_num_packs(int K) -{ - int r = (K - 1) / NUM_LANES + 1; - return r < 2 ? 2 : r; -} - -static inline void extr_trans_init(struct extr_trans *x) -{ - x->SN = INFINITY; - x->NN = INFINITY; - x->SB = INFINITY; - x->NB = INFINITY; - x->EB = INFINITY; - x->JB = INFINITY; - x->EJ = INFINITY; - x->JJ = INFINITY; - x->EC = INFINITY; - x->CC = INFINITY; - x->ET = INFINITY; - x->CT = INFINITY; -} - -static inline void core_trans_init(struct core_trans *x) -{ - x->BM = dup(INFINITY); - x->MM = dup(INFINITY); - x->MI = dup(INFINITY); - x->MD = dup(INFINITY); - x->IM = dup(INFINITY); - x->II = dup(INFINITY); - x->DM = dup(INFINITY); - x->DD = dup(INFINITY); -} - -static inline void extr_state_init(struct extr_state *x, int t) -{ - x[t].S = INFINITY; - x[t].N = INFINITY; - x[t].B = INFINITY; - - x[t].J = INFINITY; - - x[t].E = INFINITY; - x[t].C = INFINITY; - x[t].T = INFINITY; -} - -static inline void core_state_init(struct core_state *x, int t, int q, - int Q) -{ - x[time_map(q, t, Q)].M = dup(INFINITY); - x[time_map(q, t, Q)].D = dup(INFINITY); - x[time_map(q, t, Q)].I = dup(INFINITY); -} - -static inline void emission_init(struct emission *x, int Q) -{ - for (int i = 0; i < VITFAST_TABLE_SIZE; ++i) - { - x->null[i] = INFINITY; - x->background[i] = dup(INFINITY); - } - - for (int i = 0; i < VITFAST_TABLE_SIZE * Q; ++i) - x->match[i] = dup(INFINITY); -} - -INLINE int all_leq(pack a, pack b) -{ -#if __ARM_NEON - uint32x4_t m = vmvnq_u32(vceqq_f32(min(a, b), a)); - uint32x2_t r = vshrn_n_u64(vreinterpretq_u64_u32(m), 16); - return !vget_lane_u64(vreinterpret_u64_u32(r), 0); -#endif - -#if __AVX__ - return 0xFF == _mm256_movemask_ps(_mm256_cmp_ps(a, b, _CMP_LE_OS)); -#endif -} - -INLINE void set(pack *x, float scalar, int e) -{ -#if __ARM_NEON - switch (e) - { - case 0: *x = vsetq_lane_f32(scalar, *x, 0); break; - case 1: *x = vsetq_lane_f32(scalar, *x, 1); break; - case 2: *x = vsetq_lane_f32(scalar, *x, 2); break; - case 3: *x = vsetq_lane_f32(scalar, *x, 3); break; - default: - __builtin_unreachable(); - break; - } -#endif - -#if __AVX__ - switch (e) - { - case 0: *x = _mm256_blend_ps(*x, _mm256_set1_ps(scalar), 1 << 0); break; - case 1: *x = _mm256_blend_ps(*x, _mm256_set1_ps(scalar), 1 << 1); break; - case 2: *x = _mm256_blend_ps(*x, _mm256_set1_ps(scalar), 1 << 2); break; - case 3: *x = _mm256_blend_ps(*x, _mm256_set1_ps(scalar), 1 << 3); break; - case 4: *x = _mm256_blend_ps(*x, _mm256_set1_ps(scalar), 1 << 4); break; - case 5: *x = _mm256_blend_ps(*x, _mm256_set1_ps(scalar), 1 << 5); break; - case 6: *x = _mm256_blend_ps(*x, _mm256_set1_ps(scalar), 1 << 6); break; - case 7: *x = _mm256_blend_ps(*x, _mm256_set1_ps(scalar), 1 << 7); break; - default: - __builtin_unreachable(); - break; - } -#endif -} diff --git a/c-core/vitfast.h b/c-core/vitfast.h deleted file mode 100644 index cc8046c..0000000 --- a/c-core/vitfast.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef VITFAST_H -#define VITFAST_H - -#include "viterbi_ids.h" - -#define VITFAST_TABLE_SIZE 1364 -#define VITFAST_TIME_FRAME 6 - -struct vitfast; - -typedef int (*vitfast_code_fn)(int pos, int len, void *arg); - -struct vitfast *vitfast_new(void); -int vitfast_setup(struct vitfast *, int K); -void vitfast_set_extr_trans(struct vitfast *, enum extr_trans_id, float scalar); -void vitfast_set_core_trans(struct vitfast *, enum core_trans_id, float scalar, int k); -void vitfast_set_null(struct vitfast *, float scalar, int code); -void vitfast_set_background(struct vitfast *, float scalar, int code); -void vitfast_set_match(struct vitfast *, float scalar, int k, int code); -float vitfast_cost(struct vitfast *, int L, vitfast_code_fn, void *code_arg); -int vitfast_num_packs(int K); -void vitfast_del(struct vitfast const *); - -#endif diff --git a/c-core/viterbi_ids.h b/c-core/vith.h similarity index 64% rename from c-core/viterbi_ids.h rename to c-core/vith.h index d02289c..677bff2 100644 --- a/c-core/viterbi_ids.h +++ b/c-core/vith.h @@ -1,8 +1,10 @@ -#ifndef VITERBI_IDS_H -#define VITERBI_IDS_H +#ifndef VITERBI_COMMON_H +#define VITERBI_COMMON_H enum extr_trans_id { + EXTR_TRANS_RR, + EXTR_TRANS_SN, EXTR_TRANS_NN, EXTR_TRANS_SB, @@ -29,4 +31,11 @@ enum core_trans_id CORE_TRANS_DD, }; +typedef int (*viterbi_code_fn)(int pos, int len, void *arg); + +#define VITERBI_TABLE_SIZE 1364 +#define VITERBI_TIME_FRAME 6 + +#define VITERBI_DEBUG 0 + #endif diff --git a/c-core/vitref.c b/c-core/vitref.c deleted file mode 100644 index c8e9177..0000000 --- a/c-core/vitref.c +++ /dev/null @@ -1,432 +0,0 @@ -#include "vitref.h" -#include "sample.h" -#include -#include - -#define INLINE static inline __attribute__((always_inline)) - -struct emission -{ - float null[VITREF_TABLE_SIZE]; - float background[VITREF_TABLE_SIZE]; - float *match; -}; - -struct extr_trans -{ - float SN; - float NN; - - float SB; - float NB; - float EB; - float JB; - - float EJ; - float JJ; - - float EC; - float CC; - - float ET; - float CT; -}; - -struct core_trans -{ - float BM; - float MM; - float MI; - float MD; - float IM; - float II; - float DM; - float DD; -}; - -struct extr_state -{ - // Prefix - float S; - float N; - float B; - - // Infix - float J; - - // Suffix - float E; - float C; - float T; -}; - -struct core_state -{ - float M; - float D; - float I; -}; - -struct vitref -{ - int K; - struct extr_state extr_state[VITREF_TIME_FRAME]; - struct core_state *core_state; - - struct emission emission; - - struct extr_trans extr_trans; - struct core_trans *core_trans; -}; - -struct vitref *vitref_new(void) -{ - struct vitref *x = malloc(sizeof(struct vitref)); - if (!x) return x; - - x->K = 0; - x->core_state = NULL; - x->core_trans = NULL; - x->emission.match = NULL; - - return x; -} - -static inline void extr_trans_init(struct extr_trans *); -static inline void core_trans_init(struct core_trans *); -static inline void extr_state_init(struct extr_state *, int t); -static inline void core_state_init(struct core_state *, int t, int k, int K); -static inline void emission_init(struct emission *, int K); - -int vitref_setup(struct vitref *x, int K) -{ - x->K = K; - - for (int t = 0; t < VITREF_TIME_FRAME; ++t) - extr_state_init(x->extr_state, t); - - free(x->core_state); - x->core_state = malloc(sizeof(struct core_state[VITREF_TIME_FRAME][K])); - if (!x->core_state) return 1; - - free(x->emission.match); - x->emission.match = malloc(sizeof(float[VITREF_TABLE_SIZE][K])); - if (!x->emission.match) return 1; - - free(x->core_trans); - x->core_trans = malloc(sizeof(struct core_trans[K])); - if (!x->core_trans) return 1; - - extr_trans_init(&x->extr_trans); - for (int k = 0; k < K; ++k) - core_trans_init(&x->core_trans[k]); - - for (int k = 0; k < K; ++k) - { - for (int t = 0; t < VITREF_TIME_FRAME; ++t) - core_state_init(x->core_state, t, k, K); - } - - emission_init(&x->emission, K); - - return 0; -} - -float vitref_get_extr_trans(struct vitref const *x, - enum extr_trans_id id) -{ - switch (id) - { - case EXTR_TRANS_SN: return x->extr_trans.SN; break; - case EXTR_TRANS_NN: return x->extr_trans.NN; break; - case EXTR_TRANS_SB: return x->extr_trans.SB; break; - case EXTR_TRANS_NB: return x->extr_trans.NB; break; - case EXTR_TRANS_EB: return x->extr_trans.EB; break; - case EXTR_TRANS_JB: return x->extr_trans.JB; break; - case EXTR_TRANS_EJ: return x->extr_trans.EJ; break; - case EXTR_TRANS_JJ: return x->extr_trans.JJ; break; - case EXTR_TRANS_EC: return x->extr_trans.EC; break; - case EXTR_TRANS_CC: return x->extr_trans.CC; break; - case EXTR_TRANS_ET: return x->extr_trans.ET; break; - case EXTR_TRANS_CT: return x->extr_trans.CT; break; - default: - __builtin_unreachable(); - break; - } -} - -float vitref_get_core_trans(struct vitref const *x, - enum core_trans_id id, int k) -{ - switch (id) - { - case CORE_TRANS_BM: return x->core_trans[k].BM; break; - case CORE_TRANS_MM: return x->core_trans[k].MM; break; - case CORE_TRANS_MI: return x->core_trans[k].MI; break; - case CORE_TRANS_MD: return x->core_trans[k].MD; break; - case CORE_TRANS_IM: return x->core_trans[k].IM; break; - case CORE_TRANS_II: return x->core_trans[k].II; break; - case CORE_TRANS_DM: return x->core_trans[k].DM; break; - case CORE_TRANS_DD: return x->core_trans[k].DD; break; - default: - __builtin_unreachable(); - break; - } -} - -float vitref_get_null(struct vitref const *x, int code) -{ - return x->emission.null[code]; -} - -float vitref_get_background(struct vitref const *x, int code) -{ - return x->emission.background[code]; -} - -float vitref_get_match(struct vitref const *x, int k, int code) -{ - return x->emission.match[code * x->K + k]; -} - -INLINE int time_map(int k, int t, int K) { return t * K + k; } -INLINE int imin(int a, int b) { return a < b ? a : b; } -INLINE float min(float a, float b) { return fminf(a, b); } - -static inline void core_advance(struct core_state *, int K); -static inline void extr_advance(struct extr_state *); - -float vitref_cost(struct vitref *x, int L, vitref_code_fn code_fn, - void *code_arg) -{ - struct emission const em = x->emission; - - struct extr_trans const xt = x->extr_trans; - struct core_trans const *ct = x->core_trans; - - struct extr_state *xs = x->extr_state; - struct core_state *cs = x->core_state; - -#define xM(k, t) cs[time_map(k, t, K)].M -#define xD(k, t) cs[time_map(k, t, K)].D -#define xI(k, t) cs[time_map(k, t, K)].I - - int K = x->K; - - // For l = 0 - xs[1].S = 0; - xs[1].B = xt.SB; - - for (int l = 1; l <= L; ++l) - { - for (int t = imin((VITREF_TIME_FRAME - 1), l); t > 0; --t) - { - int code = code_fn(l - t, t, code_arg); - float nil = em.null[code]; - - xs[0].N = min(xs[0].N, xs[t].S + xt.SN + nil); - xs[0].N = min(xs[0].N, xs[t].N + xt.NN + nil); - - xs[0].B = min(xs[0].B, xs[0].S + xt.SB); - xs[0].B = min(xs[0].B, xs[0].N + xt.NB); - - xs[0].J = min(xs[0].J, xs[t].E + xt.EJ + nil); - xs[0].J = min(xs[0].J, xs[t].J + xt.JJ + nil); - - xs[0].C = min(xs[0].C, xs[t].E + xt.EC + nil); - xs[0].C = min(xs[0].C, xs[t].C + xt.CC + nil); - - float lastMt = INFINITY; - float lastDt = INFINITY; - float lastIt = INFINITY; - float lastM0 = INFINITY; - float lastD0 = INFINITY; - float accumE = INFINITY; - float bg = em.background[code]; - for (int k = 0; k < K; ++k) - { - float ma = em.match[code * K + k]; - - xM(k, 0) = min(xM(k, 0), xs[t].B + ct[k].BM + ma); - xM(k, 0) = min(xM(k, 0), lastMt + ct[k].MM + ma); - xM(k, 0) = min(xM(k, 0), lastIt + ct[k].IM + ma); - xM(k, 0) = min(xM(k, 0), lastDt + ct[k].DM + ma); - - xI(k, 0) = min(xI(k, 0), xM(k, t) + ct[k].MI + bg); - xI(k, 0) = min(xI(k, 0), xI(k, t) + ct[k].II + bg); - - xD(k, 0) = min(xD(k, 0), lastM0 + ct[k].MD); - xD(k, 0) = min(xD(k, 0), lastD0 + ct[k].DD); - - accumE = min(accumE, xM(k, 0)); - accumE = min(accumE, xD(k, 0)); - - lastMt = xM(k, t); - lastDt = xD(k, t); - lastIt = xI(k, t); - lastM0 = xM(k, 0); - lastD0 = xD(k, 0); - } - xs[0].E = accumE; - - xs[0].B = min(xs[0].B, xs[0].E + xt.EB); - xs[0].B = min(xs[0].B, xs[0].J + xt.JB); - - xs[0].T = min(xs[0].T, xs[0].E + xt.ET); - xs[0].T = min(xs[0].T, xs[0].C + xt.CT); - } - core_advance(cs, K); - extr_advance(xs); - } - -#undef xM -#undef xD -#undef xI - - return xs[1].T; -} - -void vitref_del(struct vitref const *x) -{ - if (x) - { - free(x->core_state); - free(x->core_trans); - free(x->emission.match); - free((void *)x); - } -} - -void vitref_sample(struct vitref *x, int seed) -{ - srand(seed); - x->extr_trans.SN = sample_float(); - x->extr_trans.NN = sample_float(); - - x->extr_trans.SB = sample_float(); - x->extr_trans.NB = sample_float(); - x->extr_trans.EB = sample_float(); - x->extr_trans.JB = sample_float(); - - x->extr_trans.EJ = sample_float(); - x->extr_trans.JJ = sample_float(); - - x->extr_trans.EC = sample_float(); - x->extr_trans.CC = sample_float(); - - x->extr_trans.ET = sample_float(); - x->extr_trans.CT = sample_float(); - - for (int k = 0; k < x->K; ++k) - { - x->core_trans[k].BM = sample_float(); - x->core_trans[k].MM = sample_float(); - x->core_trans[k].MI = sample_float(); - x->core_trans[k].MD = sample_float(); - x->core_trans[k].IM = sample_float(); - x->core_trans[k].II = sample_float(); - x->core_trans[k].DM = sample_float(); - x->core_trans[k].DD = sample_float(); - } - - for (int i = 0; i < VITREF_TABLE_SIZE; ++i) - x->emission.null[i] = sample_float(); - - for (int i = 0; i < VITREF_TABLE_SIZE; ++i) - x->emission.background[i] = sample_float(); - - for (int i = 0; i < VITREF_TABLE_SIZE * x->K; ++i) - x->emission.match[i] = sample_float(); -} - -static inline void extr_trans_init(struct extr_trans *x) -{ - x->SN = INFINITY; - x->NN = INFINITY; - x->SB = INFINITY; - x->NB = INFINITY; - x->EB = INFINITY; - x->JB = INFINITY; - x->EJ = INFINITY; - x->JJ = INFINITY; - x->EC = INFINITY; - x->CC = INFINITY; - x->ET = INFINITY; - x->CT = INFINITY; -} - -static inline void core_trans_init(struct core_trans *x) -{ - x->BM = INFINITY; - x->MM = INFINITY; - x->MI = INFINITY; - x->MD = INFINITY; - x->IM = INFINITY; - x->II = INFINITY; - x->DM = INFINITY; - x->DD = INFINITY; -} - -static inline void extr_state_init(struct extr_state *x, int t) -{ - x[t].S = INFINITY; - x[t].N = INFINITY; - x[t].B = INFINITY; - - x[t].J = INFINITY; - - x[t].E = INFINITY; - x[t].C = INFINITY; - x[t].T = INFINITY; -} - -static inline void core_state_init(struct core_state *x, int t, int k, - int K) -{ - x[time_map(k, t, K)].M = INFINITY; - x[time_map(k, t, K)].D = INFINITY; - x[time_map(k, t, K)].I = INFINITY; -} - -static inline void emission_init(struct emission *x, int K) -{ - for (int i = 0; i < VITREF_TABLE_SIZE; ++i) - { - x->null[i] = INFINITY; - x->background[i] = INFINITY; - } - - for (int i = 0; i < VITREF_TABLE_SIZE * K; ++i) - x->match[i] = INFINITY; -} - -static inline void core_advance(struct core_state *x, int K) -{ - for (int k = 0; k < K; ++k) - { - for (int t = VITREF_TIME_FRAME - 1; t > 0; --t) - { - x[time_map(k, t, K)].M = x[time_map(k, t - 1, K)].M; - x[time_map(k, t, K)].D = x[time_map(k, t - 1, K)].D; - x[time_map(k, t, K)].I = x[time_map(k, t - 1, K)].I; - } - core_state_init(x, 0, k, K); - } -} - -static inline void extr_advance(struct extr_state *x) -{ - for (int t = VITREF_TIME_FRAME - 1; t > 0; --t) - { - x[t].S = x[t - 1].S; - x[t].N = x[t - 1].N; - x[t].B = x[t - 1].B; - - x[t].J = x[t - 1].J; - - x[t].E = x[t - 1].E; - x[t].C = x[t - 1].C; - x[t].T = x[t - 1].T; - } - extr_state_init(x, 0); -} diff --git a/c-core/vitref.h b/c-core/vitref.h deleted file mode 100644 index 73aa5b5..0000000 --- a/c-core/vitref.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifndef VITREF_H -#define VITREF_H - -#include "viterbi_ids.h" - -#define VITREF_TABLE_SIZE 1364 -#define VITREF_TIME_FRAME 6 - -struct vitref; - -typedef int (*vitref_code_fn)(int pos, int len, void *arg); - -struct vitref *vitref_new(void); -int vitref_setup(struct vitref *, int K); -float vitref_get_extr_trans(struct vitref const *, enum extr_trans_id); -float vitref_get_core_trans(struct vitref const *, enum core_trans_id, int k); -float vitref_get_null(struct vitref const *, int code); -float vitref_get_background(struct vitref const *x, int code); -float vitref_get_match(struct vitref const *x, int k, int code); -float vitref_cost(struct vitref *, int L, vitref_code_fn, void *code_arg); -void vitref_del(struct vitref const *); -void vitref_sample(struct vitref *, int seed); - -#endif