-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathyak.h
142 lines (113 loc) · 3.81 KB
/
yak.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#ifndef YAK_H
#define YAK_H
#define YAKS_VERSION "0.1-r58-dirty"
#include <stdint.h>
#include <map>
#include <set>
#include <vector>
#define YAK_MAX_KMER 31
#define YAK_COUNTER_BITS 18
#define YAK_POS_ID_BITS 48
#define YAK_POS_BITS 32
#define YAK_POS_REPEAT_MASK (1<<(YAK_POS_ID_BITS-1))
#define YAK_POS_POS_MASK ((1<<(YAK_POS_BITS))-1)
#define YAK_POS_ID_MASK ((1<<(YAK_POS_ID_BITS))-1-YAK_POS_REPEAT_MASK-YAK_POS_POS_MASK)
#define YAK_N_COUNTS (1<<YAK_COUNTER_BITS)
#define YAK_MAX_COUNT ((1<<YAK_COUNTER_BITS)-1)
#define YAK_REPEAT_MASK (1<<(YAK_COUNTER_BITS-1))
#define YAK_FORWARD_MASK (1<<(YAK_COUNTER_BITS-2))
#define YAK_KEY_MASK ((1<<YAK_COUNTER_BITS)- 1 - YAK_REPEAT_MASK - YAK_FORWARD_MASK)
#define YAK_POS_MASK ((1<<30)- 1)
#define YAK_BLK_SHIFT 9 // 64 bytes, the size of a cache line
#define YAK_BLK_MASK ((1<<(YAK_BLK_SHIFT)) - 1)
#define YAK_LOAD_ALL 1
#define YAK_LOAD_TRIOBIN1 2
#define YAK_LOAD_TRIOBIN2 3
#define YAK_MAGIC "YAK\2"
typedef struct {
int32_t bf_shift, bf_n_hash;
int32_t k;
int32_t pre;
int32_t n_thread;
int64_t chunk_size;
} yak_copt_t;
typedef struct {
int32_t print_each;
int32_t min_len;
int32_t n_threads;
double min_frac;
double fpr;
int64_t chunk_size;
} yak_qopt_t;
typedef struct {
int64_t tot;
double qv_raw, qv, cov, err;
double fpr_lower, fpr_upper;
double adj_cnt[1<<YAK_COUNTER_BITS];
} yak_qstat_t;
typedef struct {
int n_shift, n_hashes;
uint8_t *b;
} yak_bf_t;
struct yak_ht_t;
typedef struct {
struct yak_ht_t *h;
yak_bf_t *b;
} yak_ch1_t;
typedef struct {
int k, pre, n_hash, n_shift;
uint64_t tot;
yak_ch1_t *h;
} yak_ch_t;
typedef struct{
int n_seq;
char** names;
yak_ch_t** yak_ch;
} yak_ch_arr_t;
typedef struct{
bool repeated;
bool init;
uint16_t record;
} recordset1_t;
typedef struct{
uint32_t current_length;
recordset1_t *recordsets;
} recordset_t;
typedef struct{
bool repeated;
bool init;
uint16_t uni_id;
uint64_t pos;
} record_ps_t;
typedef struct{
uint32_t current_length;
record_ps_t *records;
} recordset_ps_t;
extern int yak_verbose;
extern unsigned char seq_nt4_table[256];
void yak_copt_init(yak_copt_t *opt);
yak_bf_t *yak_bf_init(int n_shift, int n_hashes);
void yak_bf_destroy(yak_bf_t *b);
int yak_bf_insert(yak_bf_t *b, uint64_t hash);
yak_ch_t *yak_ch_init(int k, int pre, int n_hash, int n_shift);
void yak_ch_destroy(yak_ch_t *h);
void yak_ch_destroy_bf(yak_ch_t *h);
int yak_ch_insert_list(yak_ch_t *h, int create_new, int n, const uint64_t *a);
int yak_ch_insert_list_kmer_record_mapping(yak_ch_t *h, int create_new, int n, const uint64_t *a, const bool *f, recordset_t* recordset, const uint16_t *r, long i, std::set<uint64_t>* deleted);
int yak_ch_insert_list_kmer_pos(yak_ch_t *h, yak_ch_t *h_pos, int create_new, int n, const uint64_t *a, const uint16_t *r, const uint32_t*pos, long i);
int yak_ch_get(const yak_ch_t *h, uint64_t x);
int yak_ch_get_k(const yak_ch_t *h, uint64_t x);
uint16_t yak_ch_get_pos(const yak_ch_t *h, const yak_ch_t *h_pos, uint64_t x, uint32_t *pos);
void yak_ch_clear(yak_ch_t *h, int n_thread);
void yak_ch_hist(const yak_ch_t *h, int64_t cnt[YAK_N_COUNTS], int n_thread);
void yak_ch_shrink(yak_ch_t *h, int min, int max, int n_thread);
int yak_ch_dump(const yak_ch_t *h, const char *fn);
yak_ch_t *yak_ch_restore(const char *fn);
yak_ch_t *yak_ch_restore_core(yak_ch_t *ch0, const char *fn, int mode, ...);
yak_ch_t *yak_count(const char *fn, const yak_copt_t *opt, yak_ch_t *h0);
yak_ch_t *yak_count_create_new(const char *fn, const yak_copt_t *opt, yak_ch_t *h0);
yak_ch_arr_t* yak_count_multi(const char *fn, const yak_copt_t *opt, yak_ch_arr_t *h0);
void yak_qopt_init(yak_qopt_t *opt);
void yak_qv(const yak_qopt_t *opt, const char *fn, const yak_ch_t *ch, int64_t *cnt);
int yak_qv_solve(const int64_t *hist, const int64_t *cnt, int kmer, double fpr, yak_qstat_t *qs);
#endif