-
-
Notifications
You must be signed in to change notification settings - Fork 772
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement O(n) sort for inverted selection #1904
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1724,12 +1724,40 @@ static char *findinsel(char *startpos, int len) | |
} | ||
} | ||
|
||
static int markcmp(const void *va, const void *vb) | ||
{ | ||
const selmark *ma = (selmark*)va; | ||
const selmark *mb = (selmark*)vb; | ||
/* | ||
* Sort selection markers by their starting position in O(n) time. Use one byte | ||
* radix and eight passes (starting from the LSB). | ||
*/ | ||
static void sort_marked(selmark *data, selmark *scratch, size_t nelem) | ||
{ | ||
size_t cnt[256]; | ||
size_t idx[256]; | ||
size_t value; | ||
size_t i, j; | ||
void *tmp; | ||
int b; | ||
|
||
for (b = 0; b < 8; b++) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The magic 8 being? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I had intended to comment on this. I think it's assuming EDIT: just noticed the comment down below, looks like you figured it out :D |
||
memset(cnt, 0, sizeof(cnt)); | ||
for (i = 0; i < nelem; i++) { | ||
value = (size_t)data[i].startpos; | ||
cnt[(value >> (b * 8)) & 0xFF]++; | ||
} | ||
|
||
return ma->startpos - mb->startpos; | ||
idx[0] = 0; | ||
for (i = 1; i < 256; i++) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We mostly use |
||
idx[i] = idx[i - 1] + cnt[i - 1]; | ||
|
||
for (i = 0; i < nelem; i++) { | ||
value = (size_t)data[i].startpos; | ||
j = idx[(value >> (b * 8)) & 0xFF]++; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Only here do we learn what But this also assumes that Adjust the names and the limit for |
||
memcpy(&scratch[j], &data[i], sizeof(selmark)); | ||
} | ||
|
||
tmp = data; | ||
data = scratch; | ||
scratch = tmp; | ||
} | ||
} | ||
|
||
/* scanselforpath() must be called before calling this */ | ||
|
@@ -1754,10 +1782,13 @@ static void invertselbuf(const int pathlen) | |
int i, nmarked = 0, prev = 0; | ||
struct entry *dentp; | ||
bool scan = FALSE; | ||
selmark *marked = malloc(nselected * sizeof(selmark)); | ||
selmark *marked = calloc(nselected, sizeof(*marked)); | ||
selmark *scratch = calloc(nselected, sizeof(*scratch)); | ||
Comment on lines
-1757
to
+1786
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we replacing the actual struct There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, I just realized that it's changed to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Yeah, that's fine. For reference C11 6.5.3.4:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The more you know. I'm still more for
|
||
|
||
if (!marked) { | ||
if (marked == NULL || scratch == NULL) { | ||
printwarn(NULL); | ||
free(marked); | ||
free(scratch); | ||
return; | ||
} | ||
|
||
|
@@ -1810,7 +1841,7 @@ static void invertselbuf(const int pathlen) | |
* With entries sorted we can merge adjacent ones allowing us to | ||
* move them in a single go. | ||
*/ | ||
qsort(marked, nmarked, sizeof(selmark), &markcmp); | ||
sort_marked(marked, scratch, nselected); | ||
|
||
/* Some files might be adjacent. Merge them into a single entry */ | ||
for (i = 1; i < nmarked; ++i) { | ||
|
@@ -1850,6 +1881,7 @@ static void invertselbuf(const int pathlen) | |
} | ||
|
||
free(marked); | ||
free(scratch); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should |
||
|
||
/* Buffer size adjustment */ | ||
selbufpos -= shrinklen; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd prefer these names to be a bit more descriptive. What is
b
?cnt
andidx
make sense if you know what radix sort does, so that's more or less fine.i
is an iteration variable that's fine, butj
isn't, it's used for something completely unrelated.Also small nitpick, we know the type of
tmp
, it should beselmark *
just asdata
andscratch
are.