Skip to content

Commit 96beef8

Browse files
chriscoolgitster
authored andcommitted
sha1-lookup: add new "sha1_pos" function to efficiently lookup sha1
This function has been copied from the "patch_pos" function in "patch-ids.c" but an additional parameter has been added. The new parameter is a function pointer, that is used to access the sha1 of an element in the table. Signed-off-by: Christian Couder <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent e96f368 commit 96beef8

File tree

2 files changed

+108
-0
lines changed

2 files changed

+108
-0
lines changed

sha1-lookup.c

+101
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,107 @@
11
#include "cache.h"
22
#include "sha1-lookup.h"
33

4+
static uint32_t take2(const unsigned char *sha1)
5+
{
6+
return ((sha1[0] << 8) | sha1[1]);
7+
}
8+
9+
/*
10+
* Conventional binary search loop looks like this:
11+
*
12+
* do {
13+
* int mi = (lo + hi) / 2;
14+
* int cmp = "entry pointed at by mi" minus "target";
15+
* if (!cmp)
16+
* return (mi is the wanted one)
17+
* if (cmp > 0)
18+
* hi = mi; "mi is larger than target"
19+
* else
20+
* lo = mi+1; "mi is smaller than target"
21+
* } while (lo < hi);
22+
*
23+
* The invariants are:
24+
*
25+
* - When entering the loop, lo points at a slot that is never
26+
* above the target (it could be at the target), hi points at a
27+
* slot that is guaranteed to be above the target (it can never
28+
* be at the target).
29+
*
30+
* - We find a point 'mi' between lo and hi (mi could be the same
31+
* as lo, but never can be the same as hi), and check if it hits
32+
* the target. There are three cases:
33+
*
34+
* - if it is a hit, we are happy.
35+
*
36+
* - if it is strictly higher than the target, we update hi with
37+
* it.
38+
*
39+
* - if it is strictly lower than the target, we update lo to be
40+
* one slot after it, because we allow lo to be at the target.
41+
*
42+
* When choosing 'mi', we do not have to take the "middle" but
43+
* anywhere in between lo and hi, as long as lo <= mi < hi is
44+
* satisfied. When we somehow know that the distance between the
45+
* target and lo is much shorter than the target and hi, we could
46+
* pick mi that is much closer to lo than the midway.
47+
*/
48+
/*
49+
* The table should contain "nr" elements.
50+
* The sha1 of element i (between 0 and nr - 1) should be returned
51+
* by "fn(i, table)".
52+
*/
53+
int sha1_pos(const unsigned char *sha1, void *table, size_t nr,
54+
sha1_access_fn fn)
55+
{
56+
size_t hi = nr;
57+
size_t lo = 0;
58+
size_t mi = 0;
59+
60+
if (!nr)
61+
return -1;
62+
63+
if (nr != 1) {
64+
size_t lov, hiv, miv, ofs;
65+
66+
for (ofs = 0; ofs < 18; ofs += 2) {
67+
lov = take2(fn(0, table) + ofs);
68+
hiv = take2(fn(nr - 1, table) + ofs);
69+
miv = take2(sha1 + ofs);
70+
if (miv < lov)
71+
return -1;
72+
if (hiv < miv)
73+
return -1 - nr;
74+
if (lov != hiv) {
75+
/*
76+
* At this point miv could be equal
77+
* to hiv (but sha1 could still be higher);
78+
* the invariant of (mi < hi) should be
79+
* kept.
80+
*/
81+
mi = (nr - 1) * (miv - lov) / (hiv - lov);
82+
if (lo <= mi && mi < hi)
83+
break;
84+
die("oops");
85+
}
86+
}
87+
if (18 <= ofs)
88+
die("cannot happen -- lo and hi are identical");
89+
}
90+
91+
do {
92+
int cmp;
93+
cmp = hashcmp(fn(mi, table), sha1);
94+
if (!cmp)
95+
return mi;
96+
if (cmp > 0)
97+
hi = mi;
98+
else
99+
lo = mi + 1;
100+
mi = (hi + lo) / 2;
101+
} while (lo < hi);
102+
return -lo-1;
103+
}
104+
4105
/*
5106
* Conventional binary search loop looks like this:
6107
*

sha1-lookup.h

+7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
#ifndef SHA1_LOOKUP_H
22
#define SHA1_LOOKUP_H
33

4+
typedef const unsigned char *sha1_access_fn(size_t index, void *table);
5+
6+
extern int sha1_pos(const unsigned char *sha1,
7+
void *table,
8+
size_t nr,
9+
sha1_access_fn fn);
10+
411
extern int sha1_entry_pos(const void *table,
512
size_t elem_size,
613
size_t key_offset,

0 commit comments

Comments
 (0)