|
1 | 1 | #include "cache.h"
|
2 | 2 | #include "sha1-lookup.h"
|
3 | 3 |
|
| 4 | +static uint32_t take2(const unsigned char *sha1) |
| 5 | +{ |
| 6 | + return ((sha1[0] << 8) | sha1[1]); |
| 7 | +} |
| 8 | + |
| 9 | +/* |
| 10 | + * Conventional binary search loop looks like this: |
| 11 | + * |
| 12 | + * do { |
| 13 | + * int mi = (lo + hi) / 2; |
| 14 | + * int cmp = "entry pointed at by mi" minus "target"; |
| 15 | + * if (!cmp) |
| 16 | + * return (mi is the wanted one) |
| 17 | + * if (cmp > 0) |
| 18 | + * hi = mi; "mi is larger than target" |
| 19 | + * else |
| 20 | + * lo = mi+1; "mi is smaller than target" |
| 21 | + * } while (lo < hi); |
| 22 | + * |
| 23 | + * The invariants are: |
| 24 | + * |
| 25 | + * - When entering the loop, lo points at a slot that is never |
| 26 | + * above the target (it could be at the target), hi points at a |
| 27 | + * slot that is guaranteed to be above the target (it can never |
| 28 | + * be at the target). |
| 29 | + * |
| 30 | + * - We find a point 'mi' between lo and hi (mi could be the same |
| 31 | + * as lo, but never can be the same as hi), and check if it hits |
| 32 | + * the target. There are three cases: |
| 33 | + * |
| 34 | + * - if it is a hit, we are happy. |
| 35 | + * |
| 36 | + * - if it is strictly higher than the target, we update hi with |
| 37 | + * it. |
| 38 | + * |
| 39 | + * - if it is strictly lower than the target, we update lo to be |
| 40 | + * one slot after it, because we allow lo to be at the target. |
| 41 | + * |
| 42 | + * When choosing 'mi', we do not have to take the "middle" but |
| 43 | + * anywhere in between lo and hi, as long as lo <= mi < hi is |
| 44 | + * satisfied. When we somehow know that the distance between the |
| 45 | + * target and lo is much shorter than the target and hi, we could |
| 46 | + * pick mi that is much closer to lo than the midway. |
| 47 | + */ |
| 48 | +/* |
| 49 | + * The table should contain "nr" elements. |
| 50 | + * The sha1 of element i (between 0 and nr - 1) should be returned |
| 51 | + * by "fn(i, table)". |
| 52 | + */ |
| 53 | +int sha1_pos(const unsigned char *sha1, void *table, size_t nr, |
| 54 | + sha1_access_fn fn) |
| 55 | +{ |
| 56 | + size_t hi = nr; |
| 57 | + size_t lo = 0; |
| 58 | + size_t mi = 0; |
| 59 | + |
| 60 | + if (!nr) |
| 61 | + return -1; |
| 62 | + |
| 63 | + if (nr != 1) { |
| 64 | + size_t lov, hiv, miv, ofs; |
| 65 | + |
| 66 | + for (ofs = 0; ofs < 18; ofs += 2) { |
| 67 | + lov = take2(fn(0, table) + ofs); |
| 68 | + hiv = take2(fn(nr - 1, table) + ofs); |
| 69 | + miv = take2(sha1 + ofs); |
| 70 | + if (miv < lov) |
| 71 | + return -1; |
| 72 | + if (hiv < miv) |
| 73 | + return -1 - nr; |
| 74 | + if (lov != hiv) { |
| 75 | + /* |
| 76 | + * At this point miv could be equal |
| 77 | + * to hiv (but sha1 could still be higher); |
| 78 | + * the invariant of (mi < hi) should be |
| 79 | + * kept. |
| 80 | + */ |
| 81 | + mi = (nr - 1) * (miv - lov) / (hiv - lov); |
| 82 | + if (lo <= mi && mi < hi) |
| 83 | + break; |
| 84 | + die("oops"); |
| 85 | + } |
| 86 | + } |
| 87 | + if (18 <= ofs) |
| 88 | + die("cannot happen -- lo and hi are identical"); |
| 89 | + } |
| 90 | + |
| 91 | + do { |
| 92 | + int cmp; |
| 93 | + cmp = hashcmp(fn(mi, table), sha1); |
| 94 | + if (!cmp) |
| 95 | + return mi; |
| 96 | + if (cmp > 0) |
| 97 | + hi = mi; |
| 98 | + else |
| 99 | + lo = mi + 1; |
| 100 | + mi = (hi + lo) / 2; |
| 101 | + } while (lo < hi); |
| 102 | + return -lo-1; |
| 103 | +} |
| 104 | + |
4 | 105 | /*
|
5 | 106 | * Conventional binary search loop looks like this:
|
6 | 107 | *
|
|
0 commit comments