-
Notifications
You must be signed in to change notification settings - Fork 0
/
scorestack.c
350 lines (313 loc) · 10.8 KB
/
scorestack.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
/************************************************************
* HMMER - Biological sequence analysis with HMMs
* Copyright 1992-1995 Sean R. Eddy
*
* This source code is distributed under the terms of the
* GNU General Public License. See the files COPYING and
* GNULICENSE for details.
*
************************************************************/
/* scorestack.c
* SRE, Tue Aug 17 09:50:39 1993
*
* For unidirectional scanning search procedures, implement
* a score reporting system that filters out hits that overlap
* with higher-scoring printed scores.
*
* The simplest rule to use would be to keep a record of the last hit;
* on receiving a new hit: 1) if new hit overlaps with last hit and
* new hit is higher, replace last with new; 2) if new hit overlaps
* with last hit and new hit is lower, ignore new; 3) if new hit
* does not overlap with last, report last and assign new to last.
* At end, report last. This is essentially the rule used by the
* original hmm and cove scanning procedures.
*
* There is a small weakness in this strategy, in that for three
* hits A > B > C which all overlap, only A will be reported; but
* although A overlaps B and B overlaps C, A may not overlap C.
* (Will this ever happen in reality? I dunno. I don't want to
* be surprised.)
*
* Thus, this more complicated strategy.
* Keep a stack of last hits.
* On receiving a new hit:
* 1) if new overlaps last and new > last, push new onto stack
* 2) if new overlaps last and new <= last, ignore new
* 3) if new doesn't overlap, resolve stack; start new stack and
* push new onto it.
* At end: resolve stack.
*
* Stack resolution:
* set "previously reported hit" to -1,-1 so it won't overlap w/ anything
* while something is in the stack:
* pop top hit off stack
* if it overlaps with previously reported hit, continue;
* if it doesn't overlap, report it
*
* Testing overlap:
* Given two subsequences with endpoints al,ar and bl, br,
* with no other knowledge, we would need to test whether any of
* the four endpoints are within the opposing subsequence.
* However, because we're scanning unidirectionally, we know
* that the new right end is greater than the old right end,
* so we only need to test whether the old right end >= new left
* end.
*
* External function:
*
* ReportScanHit() - report a hit
* or, if reported coords are -1,-1, resolve old
* stack, cleanup and exit.
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <limits.h>
#include "squid.h"
#ifdef MEMDEBUG
#include "dbmalloc.h"
#endif
/* Data structure for the stack of previous hits;
* declarations of the functions to manipulate it.
*/
struct hitstack_s {
int left; /* left coord of matched segment */
int right; /* right coord of matched segment */
double score; /* score of match */
struct hitstack_s *nxt; /* pointer to next elem in stack */
};
static struct hitstack_s *init_hitstack(void);
static void push_hitstack(struct hitstack_s *hstack,int left,int right, double score);
static int pop_hitstack(struct hitstack_s *hstack, int *ret_left, int *ret_right, double *ret_score);
static void free_hitstack(struct hitstack_s *hstack);
/* Function: ReportScanHit()
*
* Purpose: Caller reports a hit during a search scan, and
* provides a pointer to a function we can call to
* print non-overlapping hits. Caller reports
* -1,-1 for coords to request cleanup and end.
*
* Two special cases must be dealt with:
* INIT: If the hit stack hasn't been started yet,
* we need to initialize it before doing
* anything else
* END: If coords are -1,-1, we resolve the stack
* and cleanup; caller is finished with us
* for now.
*
* Args: left - left coord of hit segment
* right - right coord of hit segment
* score - score of the hit
* print_hit - pointer to a function to print
* nonoverlapping hits
*
* Return: 1 on success, 0 on failure.
*/
int
ReportScanHit(int left,
int right,
double score,
int (*print_hit)(int,int,double))
{
static struct hitstack_s *hstack = NULL; /* static local handle; set to NULL on 1st entry */
static int oldright = -1; /* -1 is guaranteed not to overlap w/ first report */
int oldleft;
int newleft, newright;
double newscore;
/* Check whether this is first entry;
* init hit stack if so.
*/
if (hstack == NULL) hstack = init_hitstack();
/* Check whether we have to resolve the old stack:
* if caller is reporting it's done (-1,-1 coords),
* or if new hit doesn't overlap last stacked hit.
*/
if (left > oldright || (left == -1 && right == -1))
{
/* Stack resolution.
*/
oldleft = INT_MAX;
while (pop_hitstack(hstack, &newleft, &newright, &newscore))
{
/* does this hit not overlap w/ previous printed one? */
if (newright < oldleft)
{
(*print_hit)(newleft, newright, newscore);
oldleft = newleft;
}
}
free_hitstack(hstack);
hstack = NULL;
oldright = -1;
/* Start new stack, if not done.
*/
if (left != -1 || right != -1)
{
hstack = init_hitstack();
push_hitstack(hstack, left, right, score);
oldright = right;
}
}
/* else, they overlap; if new reported score is better than last one,
* push new one. We're guaranteed to have something in
* the stack, so we can use the score in hstack->nxt->score.
* Reset oldright to be the new right edge of the stack, if we add something.
*/
else if (score > hstack->nxt->score)
{
push_hitstack(hstack, left, right, score);
oldright = right;
}
/* else, they overlap and the newly reported score
* isn't better, so we ignore it.
*/
return 1;
}
/* new_scorestack.c
* SRE, Thu Mar 24 16:00:10 1994
*
* This is the third version of my code for filtering out
* overlapping hits -- and the simplest version yet.
*
* All it does is keep the last best score and its start/end
* positions. A new score and start/end positions are compared
* to this.
*
* 1) if the new start position is different from the old,
* the new hit doesn't conflict; print the old hit
* and save the new one.
*
* 2) if the new start position is the same as the old start,
* the two hits are in conflict and need filtering.
*
* - if the new score is higher, store the new hit.
* - if the old score was higher, ignore the new hit.
*
* External function:
*
* Simple_ReportScanHit() - report a hit
* or, if reported coords are -1,-1, resolve old
* stack, cleanup and exit.
*
*/
/* Function: Simple_ReportScanHit()
*
* Purpose: Caller reports a hit during a search scan, and
* provides a pointer to a function we can call to
* print non-overlapping hits. Caller reports
* -1,-1 for coords to request cleanup and end.
*
* Two special cases must be dealt with:
* INIT: If the hit stack hasn't been started yet,
* we need to initialize it before doing
* anything else
* END: If coords are -1,-1, we resolve the stack
* and cleanup; caller is finished with us
* for now.
*
* Args: left - left coord of hit segment
* right - right coord of hit segment
* score - score of the hit
* print_hit - pointer to a function to print
* nonoverlapping hits
*
* Return: 1 on success, 0 on failure.
*/
int
Simple_ReportScanHit(int left,
int right,
double score,
int (*print_hit)(int,int,double))
{
static int oldleft = -1;
static int oldright = -1;
static double oldscore = -999999.0;
/* Check whether we have to resolve the old stack:
* if caller is reporting it's done (-1,-1 coords),
* or if new hit doesn't overlap last stacked hit.
*/
if (left != oldleft || (left == -1 && right == -1))
{
/* Print the score we were storing */
if (oldright != -1 && oldleft != -1)
(*print_hit)(oldleft, oldright, oldscore);
/* Store the score we just saw */
oldright = right;
oldleft = left;
oldscore = score;
}
/* else, they overlap with the same left (start) point;
* if new reported score is better than last one, keep new one.
*/
else if (score > oldscore)
{
oldright = right;
oldscore = score;
}
/* else, they overlap with the same left (start) point
* and the newly reported score isn't better, so we ignore it.
*/
return 1;
}
/* Functions: init_hitstack()
* push_hitstack()
* pop_hitstack()
* free_hitstack()
*
* Purpose: Implementation of the pushdown stack for
* keeping old hit positions and scores.
*
* The hitstack has a dummy begin element,
* so the first legitimate element is
* hstack->nxt. The last legitimate element
* has a NULL nxt pointer.
*/
static struct hitstack_s *
init_hitstack(void)
{
struct hitstack_s *hstack;
if ((hstack = (struct hitstack_s *) malloc (sizeof(struct hitstack_s))) == NULL)
Die("Memory allocation failure at %s line %d", __FILE__, __LINE__);
hstack->nxt = NULL;
return hstack;
}
static void
push_hitstack(struct hitstack_s *hstack,
int left,
int right,
double score)
{
struct hitstack_s *new;
if ((new = (struct hitstack_s *) malloc (sizeof(struct hitstack_s))) == NULL)
Die("Memory allocation failure at %s line %d", __FILE__, __LINE__);
new->left = left;
new->right = right;
new->score = score;
new->nxt = hstack->nxt;
hstack->nxt = new;
}
static int
pop_hitstack(struct hitstack_s *hstack,
int *ret_left,
int *ret_right,
double *ret_score)
{
struct hitstack_s *old;
if (hstack->nxt == NULL) return 0;
old = hstack->nxt;
hstack->nxt = old->nxt;
*ret_left = old->left;
*ret_right = old->right;
*ret_score = old->score;
free(old);
return 1;
}
static void
free_hitstack(struct hitstack_s *hstack)
{
int left, right;
double score;
while (pop_hitstack(hstack, &left, &right, &score) != 0)
; /* do nothing */
free(hstack);
}