forked from robertdavidgraham/masscan
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmassip-rangesv4.c
1385 lines (1164 loc) · 43.4 KB
/
massip-rangesv4.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
IPv4 and port ranges
This is one of the more integral concepts to how masscan works internally.
We combine all the input addresses and address ranges into a sorted list
of 'target' IP addresses. This allows us to enumerate all the addresses
in order by incrementing a simple index. It is that index that we randomize
in order to produce random output, but internally, everything is sorted.
Sorting the list allows us to remove duplicates. It also allows us to
apply the 'excludes' directly to the input list. In other words, other
scanners typically work by selecting an IP address at random, then checking
to see if it's been excluded, then skipping it. In this scanner, however,
we remove all the excluded address from the targets list before we start
scanning.
This module has been tuned to support mass lists of millions of target
IPv4 addresses and excludes. This has required:
- a fast way to parse the address from a file (see range-file.c)
- fast sort (just using qsort() from the standard C library)
- fast application of exludes, using an optimal O(n + m) linear
algorithm, where 'n' is the number of targets, and 'm' is the
number of excluded ranges.
Large lists can still take a bit to process. On a fast server with
7-million input ranges/addresses and 5000 exclude ranges/addresses,
it takes almost 3 seconds to process everything before starting.
*/
#include "massip-rangesv4.h"
#include "massip-port.h"
#include "logger.h"
#include "util-bool.h"
#include "util-malloc.h"
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#define BUCKET_COUNT 16
#define REGRESS(x) if (!(x)) return (fprintf(stderr, "regression failed %s:%d\n", __FILE__, __LINE__)|1)
/* An invalid range, where begin comes after the end */
static struct Range INVALID_RANGE = {2,1};
/***************************************************************************
* Does a linear search to see if the list contains the address/port.
* FIXME: This should be upgraded to a binary search. However, we don't
* really use it in any performance critical code, so it's okay
* as a linear search.
***************************************************************************/
int
rangelist_is_contains(const struct RangeList *targets, unsigned addr)
{
unsigned i;
for (i=0; i<targets->count; i++) {
struct Range *range = &targets->list[i];
if (range->begin <= addr && addr <= range->end)
return 1;
}
return 0;
}
/***************************************************************************
* Returns the first CIDR range (which can be specified with prefix bits)
* that fits within the input range. For example, consider the range
* [10.0.0.4->10.0.0.255]. This does't match the bigger CIDR range.
* The first range that would fit would be [10.0.0.04/30], or
* [10.0.0.4->10.0.0.7].
*
* Using this function allows us to decompose
***************************************************************************/
struct Range
range_first_cidr(const struct Range range, unsigned *prefix_bits) {
struct Range result = {range.begin, range.end};
unsigned zbits = 0;
/* Kludge: Special Case:
* All inputs work but the boundary case of [0.0.0.0/0] or
* [0.0.0.0-255.255.255.255]. I can't be bothered to figure out
* why the algorithm doesn't work with this range, so I'm just
* going to special case it here*/
if (range.begin == 0 && range.end == 0xFFFFffff) {
if (prefix_bits != NULL)
*prefix_bits = 0;
return range;
}
/* Count the number of trailing/suffix zeros, which may be range
* from none (0) to 32 (all bits are 0) */
for (zbits = 0; zbits <= 32; zbits++) {
if ((range.begin & (1<<zbits)) != 0)
break;
}
/* Now search for the largest CIDR range that starts with this
* begining address that fits within the ending address*/
while (zbits > 0) {
unsigned mask = ~(0xFFFFFFFF << zbits);
if (range.begin + mask > range.end)
zbits--;
else
break;
}
result.begin = range.begin;
result.end = range.begin + ~(0xFFFFffff << zbits);
if (prefix_bits != NULL)
*prefix_bits = 32-zbits;
return result;
}
bool
range_is_cidr(const struct Range range, unsigned *prefix_bits) {
struct Range out = range_first_cidr(range, prefix_bits);
if (out.begin == range.begin && out.end == range.end)
return true;
else {
if (prefix_bits != NULL)
*prefix_bits = 0xFFFFFFFF;
return false;
}
}
/***************************************************************************
* Selftest for the above function.
***************************************************************************/
static int
selftest_range_first_cidr(void) {
static struct {
struct Range in;
struct Range out;
unsigned prefix_bits;
} tests[] = {
{{0x00000000, 0xffffffff}, {0x00000000, 0xffffffff}, 0},
{{0x00000001, 0xffffffff}, {0x00000001, 0x00000001}, 32},
{{0xffffffff, 0xffffffff}, {0xffffffff, 0xffffffff}, 32},
{{0xfffffffe, 0xfffffffe}, {0xfffffffe, 0xfffffffe}, 32},
{{0x0A000000, 0x0A0000Ff}, {0x0A000000, 0x0A0000ff}, 24},
{{0x0A0000ff, 0x0A0000Ff}, {0x0A0000ff, 0x0A0000ff}, 32},
{{0x0A000000, 0x0A0000Ff}, {0x0A000000, 0x0A0000Ff}, 24},
{{0x0A000001, 0x0A0000Fe}, {0x0A000001, 0x0A000001}, 32},
{{0x0A000008, 0x0A0000Fe}, {0x0A000008, 0x0A00000f}, 29},
{{0x0A000080, 0x0A0000Fe}, {0x0A000080, 0x0A0000bf}, 26},
{{0x0A0000c0, 0x0A0000Fe}, {0x0A0000c0, 0x0A0000df}, 27},
{{0x0A0000c1, 0x0A0000Fe}, {0x0A0000c1, 0x0A0000c1}, 32},
{{0x0A0000fe, 0x0A0000Fe}, {0x0A0000fe, 0x0A0000fe}, 32},
{{0,0}, {0,0}}
};
size_t i;
for (i=0; tests[i].in.end != 0; i++) {
unsigned prefix_bits = 0xFFFFFFFF;
struct Range out = range_first_cidr(tests[i].in, &prefix_bits);
if (out.begin != tests[i].out.begin
|| out.end != tests[i].out.end
|| prefix_bits != tests[i].prefix_bits) {
fprintf(stderr, "[%u] 0x%08x->0x%08x /%u 0x%08x->0x%08x /%u\n",
(unsigned)i,
out.begin,
out.end,
prefix_bits,
tests[i].out.begin,
tests[i].out.end,
tests[i].prefix_bits);
return 1;
}
}
return 0;
}
/***************************************************************************
* Test if two ranges overlap.
* FIXME: I need to change this so that it (a) doesn't trigger on invalid
* ranges (those where begin>end) and (b) use a simpler algorithm
***************************************************************************/
static int
range_is_overlap(struct Range lhs, struct Range rhs)
{
if (lhs.begin < rhs.begin) {
if (lhs.end == 0xFFFFFFFF || lhs.end + 1 >= rhs.begin)
return 1;
}
if (lhs.begin >= rhs.begin) {
if (lhs.end <= rhs.end)
return 1;
}
if (rhs.begin < lhs.begin) {
if (rhs.end == 0xFFFFFFFF || rhs.end + 1 >= lhs.begin)
return 1;
}
if (rhs.begin >= lhs.begin) {
if (rhs.end <= lhs.end)
return 1;
}
return 0;
}
/***************************************************************************
* Combine two ranges, such as when they overlap.
***************************************************************************/
static void
range_combine(struct Range *lhs, struct Range rhs)
{
if (lhs->begin > rhs.begin)
lhs->begin = rhs.begin;
if (lhs->end < rhs.end)
lhs->end = rhs.end;
}
/***************************************************************************
* Callback for qsort() for comparing two ranges
***************************************************************************/
static int
range_compare(const void *lhs, const void *rhs)
{
struct Range *left = (struct Range *)lhs;
struct Range *right = (struct Range *)rhs;
if (left->begin < right->begin)
return -1;
else if (left->begin > right->begin)
return 1;
else
return 0;
}
/***************************************************************************
***************************************************************************/
static void
rangelist_remove_at(struct RangeList *targets, size_t index)
{
memmove(&targets->list[index],
&targets->list[index+1],
(targets->count - index) * sizeof(targets->list[index])
);
targets->count--;
}
/***************************************************************************
***************************************************************************/
void
rangelist_sort(struct RangeList *targets)
{
size_t i;
struct RangeList newlist = {0};
unsigned original_count = targets->count;
/* Empty lists are, of course, sorted. We need to set this
* to avoid an error later on in the code which asserts that
* the lists are sorted */
if (targets->count == 0) {
targets->is_sorted = 1;
return;
}
/* If it's already sorted, then skip this */
if (targets->is_sorted) {
return;
}
/* First, sort the list */
LOG(3, "[+] range:sort: sorting...\n");
qsort( targets->list, /* the array to sort */
targets->count, /* number of elements to sort */
sizeof(targets->list[0]), /* size of element */
range_compare);
/* Second, combine all overlapping ranges. We do this by simply creating
* a new list from a sorted list, so we don't have to remove things in the
* middle when collapsing overlapping entries together, which is painfully
* slow. */
LOG(3, "[+] range:sort: combining...\n");
for (i=0; i<targets->count; i++) {
rangelist_add_range(&newlist, targets->list[i].begin, targets->list[i].end);
}
LOG(3, "[+] range:sort: combined from %u elements to %u elements\n", original_count, newlist.count);
free(targets->list);
targets->list = newlist.list;
targets->count = newlist.count;
newlist.list = 0;
LOG(2, "[+] range:sort: done...\n");
targets->is_sorted = 1;
}
/***************************************************************************
* Add the IPv4 range to our list of ranges.
***************************************************************************/
void
rangelist_add_range(struct RangeList *targets, unsigned begin, unsigned end)
{
struct Range range;
range.begin = begin;
range.end = end;
/* auto-expand the list if necessary */
if (targets->count + 1 >= targets->max) {
targets->max = targets->max * 2 + 1;
targets->list = REALLOCARRAY(targets->list, targets->max, sizeof(targets->list[0]));
}
/* If empty list, then add this one */
if (targets->count == 0) {
targets->list[0] = range;
targets->count++;
targets->is_sorted = 1;
return;
}
/* If new range overlaps the last range in the list, then combine it
* rather than appending it. This is an optimization for the fact that
* we often read in sequential addresses */
if (range_is_overlap(targets->list[targets->count - 1], range)) {
range_combine(&targets->list[targets->count - 1], range);
targets->is_sorted = 0;
return;
}
/* append to the end of our list */
targets->list[targets->count] = range;
targets->count++;
targets->is_sorted = 0;
}
/** Use this when adding TCP ports, to avoid the comoplication of how
* ports are stored */
void
rangelist_add_range_tcp(struct RangeList *targets, unsigned begin, unsigned end) {
rangelist_add_range(targets,
Templ_TCP + begin,
Templ_TCP + end);
}
/** Use this when adding UDP ports, to avoid the comoplication of how
* ports are stored */
void
rangelist_add_range_udp(struct RangeList *targets, unsigned begin, unsigned end) {
rangelist_add_range(targets,
Templ_UDP + begin,
Templ_UDP + end);
}
/***************************************************************************
* This is the "free" function for the list, freeing up any memory we've
* allocated.
***************************************************************************/
void
rangelist_remove_all(struct RangeList *targets)
{
free(targets->list);
free(targets->picker);
memset(targets, 0, sizeof(*targets));
}
/***************************************************************************
***************************************************************************/
void
rangelist_merge(struct RangeList *list1, const struct RangeList *list2)
{
unsigned i;
for (i=0; i<list2->count; i++) {
rangelist_add_range(list1, list2->list[i].begin, list2->list[i].end);
}
rangelist_sort(list1);
}
/***************************************************************************
* This searches a range list and removes that range of IP addresses, if
* they exist. Since the input range can overlap multiple entries, then
* more than one entry can be removed, or truncated. Since the range
* can be in the middle of an entry in the list, it can actually increase
* the list size by one, as that entry is split into two entries.
* DEPRECATED: this function is deprecated, and will be removed at some
* point. It's only remaining in order to serve as a regression test for
* its replacement.
***************************************************************************/
static void
rangelist_remove_range(struct RangeList *targets, unsigned begin, unsigned end)
{
unsigned i;
struct Range x;
x.begin = begin;
x.end = end;
/* See if the range overlaps any exist range already in the
* list */
for (i = 0; i < targets->count; i++) {
if (!range_is_overlap(targets->list[i], x))
continue;
/* If the removal-range wholly covers the range, delete
* it completely */
if (begin <= targets->list[i].begin && end >= targets->list[i].end) {
rangelist_remove_at(targets, i);
i--;
continue;
}
/* If the removal-range bisects the target-rage, truncate
* the lower end and add a new high-end */
if (begin > targets->list[i].begin && end < targets->list[i].end) {
struct Range newrange;
newrange.begin = end+1;
newrange.end = targets->list[i].end;
targets->list[i].end = begin-1;
rangelist_add_range(targets, newrange.begin, newrange.end);
i--;
continue;
}
/* If overlap on the lower side */
if (end >= targets->list[i].begin && end < targets->list[i].end) {
targets->list[i].begin = end+1;
}
/* If overlap on the upper side */
if (begin > targets->list[i].begin && begin <= targets->list[i].end) {
targets->list[i].end = begin-1;
}
//assert(!"impossible");
}
}
static void
rangelist_add_range2(struct RangeList *targets, struct Range range)
{
rangelist_add_range(targets, range.begin, range.end);
}
static void
rangelist_remove_range2(struct RangeList *targets, struct Range range)
{
rangelist_remove_range(targets, range.begin, range.end);
}
/***************************************************************************
* Parse an IPv4 address from a line of text, moving the offset forward
* to the first non-IPv4 character
***************************************************************************/
static int
parse_ipv4(const char *line, unsigned *inout_offset, unsigned max, unsigned *ipv4)
{
unsigned offset = *inout_offset;
unsigned result = 0;
unsigned i;
for (i=0; i<4; i++) {
unsigned x = 0;
unsigned digits = 0;
if (offset >= max)
return -4;
if (!isdigit(line[offset]&0xFF))
return -1;
/* clear leading zeros */
while (offset < max && line[offset] == '0')
offset++;
/* parse maximum of 3 digits */
while (offset < max && isdigit(line[offset]&0xFF)) {
x = x * 10 + (line[offset] - '0');
offset++;
if (++digits > 3)
return -2;
}
if (x > 255)
return -5;
result = result * 256 + (x & 0xFF);
if (i == 3)
break;
if (line[offset] != '.')
return -3;
offset++; /* skip dot */
}
*inout_offset = offset;
*ipv4 = result;
return 0; /* parse OK */
}
/****************************************************************************
* Parse from text an IPv4 address range. This can be in one of several
* formats:
* - '192.168.1.1" - a single address
* - '192.168.1.0/24" - a CIDR spec
* - '192.168.1.0-192.168.1.255' - a range
* @param line
* Part of a line of text, probably read from a commandline or conf
* file.
* @param inout_offset
* On input, the offset from the start of the line where the address
* starts. On output, the offset of the first character after the
* range, or equal to 'max' if the line prematurely ended.
* @param max
* The maximum length of the line.
* @return
* The first and last address of the range, inclusive.
****************************************************************************/
struct Range
range_parse_ipv4(const char *line, unsigned *inout_offset, unsigned max)
{
unsigned offset;
struct Range result;
static const struct Range badrange = {0xFFFFFFFF, 0};
int err;
if (line == NULL)
return badrange;
if (inout_offset == NULL) {
inout_offset = &offset;
offset = 0;
max = (unsigned)strlen(line);
} else
offset = *inout_offset;
/* trim whitespace */
while (offset < max && isspace(line[offset]&0xFF))
offset++;
/* get the first IP address */
err = parse_ipv4(line, &offset, max, &result.begin);
if (err) {
return badrange;
}
result.end = result.begin;
/* trim whitespace */
while (offset < max && isspace(line[offset]&0xFF))
offset++;
/* If only one IP address, return that */
if (offset >= max)
goto end;
/*
* Handle CIDR address of the form "10.0.0.0/8"
*/
if (line[offset] == '/') {
uint64_t prefix = 0;
uint64_t mask = 0;
unsigned digits = 0;
/* skip slash */
offset++;
if (!isdigit(line[offset]&0xFF)) {
return badrange;
}
/* strip leading zeroes */
while (offset<max && line[offset] == '0')
offset++;
/* parse decimal integer */
while (offset<max && isdigit(line[offset]&0xFF)) {
prefix = prefix * 10 + (line[offset++] - '0');
if (++digits > 2)
return badrange;
}
if (prefix > 32)
return badrange;
/* Create the mask from the prefix */
mask = 0xFFFFFFFF00000000ULL >> prefix;
/* Mask off any non-zero bits from the start
* TODO print warning */
result.begin &= mask;
/* Set all suffix bits to 1, so that 192.168.1.0/24 has
* an ending address of 192.168.1.255. */
result.end = result.begin | (unsigned)~mask;
goto end;
}
/*
* Handle a dashed range like "10.0.0.100-10.0.0.200"
*/
if (offset<max && line[offset] == '-') {
unsigned ip;
offset++;
err = parse_ipv4(line, &offset, max, &ip);
if (err)
return badrange;
if (ip < result.begin) {
result.begin = 0xFFFFFFFF;
result.end = 0x00000000;
LOG(0, "err: ending addr %u.%u.%u.%u cannot come before starting addr %u.%u.%u.%u\n",
((ip>>24)&0xFF), ((ip>>16)&0xFF), ((ip>>8)&0xFF), ((ip>>0)&0xFF),
((result.begin>>24)&0xFF), ((result.begin>>16)&0xFF), ((result.begin>>8)&0xFF), ((result.begin>>0)&0xFF)
);
} else
result.end = ip;
goto end;
}
end:
*inout_offset = offset;
return result;
}
/***************************************************************************
* This is the old algorithm for applying exclude ranges, very slow
* for large lists. We keep it around for verifying correctness of the
* new replacement algorithm.
***************************************************************************/
static void
rangelist_exclude2( struct RangeList *targets,
const struct RangeList *excludes)
{
unsigned i;
for (i=0; i<excludes->count; i++) {
struct Range range = excludes->list[i];
rangelist_remove_range(targets, range.begin, range.end);
}
/* Since chopping up large ranges can split ranges, this can
* grow the list so we need to re-sort it */
rangelist_sort(targets);
}
/**
* Applies the (presumably overlapping) exclude range to the target. This can have
* four outcomes:
* - there is no overlap, in which case 'target' is unchanged, and 'split'
* is set to INVALID.
* - the entire target is excluded, in which case it's set to INVALID.
* - the overlap is at the beginning, in which case the 'begin' is increased.
* - the overlap is at the end, in which case 'end' is reduced.
* - the overlap is in the middle, in which case the target is split
* in two, with 'target' becoming the low addresses, and 'split' becoming
* the high addresses.
*/
static void
range_apply_exclude(const struct Range exclude, struct Range *target, struct Range *split)
{
/* Set 'split' to invalid to start with */
split->begin = 2;
split->end = 1;
/* Case 1: no overlap */
if (target->begin > exclude.end || target->end < exclude.begin) {
return;
}
/* Case 2: complete overlap, mark target as invalid and return */
if (target->begin >= exclude.begin && target->end <= exclude.end) {
target->begin = 2;
target->end = 1;
return;
}
/* Case 3: overlap at start */
if (target->begin >= exclude.begin && target->end > exclude.end) {
target->begin = exclude.end + 1;
return;
}
/* Case 4: overlap at end */
if (target->begin < exclude.begin && target->end <= exclude.end) {
target->end = exclude.begin - 1;
return;
}
/* Case 5: this range needs to be split */
if (target->begin < exclude.begin && target->end > exclude.end) {
split->end = target->end;
split->begin = exclude.end + 1;
target->end = exclude.begin - 1;
return;
}
/* No other condition should be possible */
assert(!"possible");
}
/***************************************************************************
***************************************************************************/
int
range_is_valid(struct Range range)
{
return range.begin <= range.end;
}
/***************************************************************************
* Apply the exclude ranges, which means removing everything from "targets"
* that's also in "exclude". This can make the target list even bigger
* as individually excluded address chop up large ranges.
***************************************************************************/
void
rangelist_exclude( struct RangeList *targets,
struct RangeList *excludes)
{
unsigned i;
unsigned x;
struct RangeList newlist = {0};
/* Both lists must be sorted */
rangelist_sort(targets);
rangelist_sort(excludes);
/* Go through all target ranges, apply excludes to them
* (which may split into two ranges), and add them to
* the new target list */
x = 0;
for (i=0; i<targets->count; i++) {
struct Range range = targets->list[i];
/* Move the exclude forward until we find a potentially
* overlapping candidate */
while (x < excludes->count && excludes->list[x].end < range.begin)
x++;
/* Keep applying excludes to this range as long as there are overlaps */
while (x < excludes->count && excludes->list[x].begin <= range.end) {
struct Range split = INVALID_RANGE;
range_apply_exclude(excludes->list[x], &range, &split);
/* If there is a split, then add the original range to our list
* and then set that range to the split-ed portion */
if (range_is_valid(split)) {
rangelist_add_range(&newlist, range.begin, range.end);
memcpy(&range, &split, sizeof(range));
}
if (excludes->list[x].begin > range.end)
break;
x++;
}
/* If the range hasn't been completely excluded, then add the remnants */
if (range_is_valid(range)) {
rangelist_add_range(&newlist, range.begin, range.end);
}
}
/* Now free the old list and move over the new list */
free(targets->list);
targets->list = newlist.list;
targets->count = newlist.count;
newlist.list = NULL;
newlist.count = 0;
/* Since chopping up large ranges can split ranges, this can
* grow the list so we need to re-sort it */
rangelist_sort(targets);
}
/***************************************************************************
* Counts the total number of addresses in all the ranges combined.
* For 0.0.0.0/0, this will be 0x100000000, which means we have to use a
* larger number than 32-bit to return the result. This assumes that
* all overlaps have been resolved in the list (i.e. it's been sorted).
***************************************************************************/
uint64_t
rangelist_count(const struct RangeList *targets)
{
unsigned i;
uint64_t result = 0;
for (i=0; i<targets->count; i++) {
result += (uint64_t)targets->list[i].end - (uint64_t)targets->list[i].begin + 1UL;
}
return result;
}
/***************************************************************************
* Get's the indexed port/address.
*
* Note that this requires a search of all the ranges. Currently, this is
* done by a learn search of the ranges. This needs to change, because
* once we start adding in a lot of "exclude ranges", the address space
* will get fragmented, and the linear search will take too long.
***************************************************************************/
static unsigned
rangelist_pick_linearsearch(const struct RangeList *targets, uint64_t index)
{
unsigned i;
for (i=0; i<targets->count; i++) {
uint64_t range = (uint64_t)targets->list[i].end - (uint64_t)targets->list[i].begin + 1UL;
if (index < range)
return (unsigned)(targets->list[i].begin + index);
else
index -= range;
}
assert(!"end of list");
return 0;
}
/***************************************************************************
***************************************************************************/
unsigned
rangelist_pick(const struct RangeList *targets, uint64_t index)
{
unsigned maxmax = targets->count;
unsigned min = 0;
unsigned max = targets->count;
unsigned mid;
const unsigned *picker = targets->picker;
if (!targets->is_sorted)
rangelist_sort((struct RangeList *)targets);
assert(targets->is_sorted);
if (picker == NULL) {
/* optimization wasn't done */
return rangelist_pick_linearsearch(targets, index);
}
for (;;) {
mid = min + (max-min)/2;
if (index < picker[mid]) {
max = mid;
continue;
} if (index >= picker[mid]) {
if (mid + 1 == maxmax)
break;
else if (index < picker[mid+1])
break;
else
min = mid+1;
}
}
return (unsigned)(targets->list[mid].begin + (index - picker[mid]));
}
/***************************************************************************
* The normal "pick" function is a linear search, which is slow when there
* are a lot of ranges. Therefore, the "pick2" creates sort of binary
* search that'll be a lot faster. We choose "binary search" because
* it's the most cache-efficient, having the least overhead to fit within
* the cache.
***************************************************************************/
void
rangelist_optimize(struct RangeList *targets)
{
unsigned *picker;
unsigned i;
unsigned total = 0;
if (targets->count == 0)
return;
/* This technique only works when the targets are in
* ascending order */
if (!targets->is_sorted)
rangelist_sort(targets);
if (targets->picker)
free(targets->picker);
picker = REALLOCARRAY(NULL, targets->count, sizeof(*picker));
for (i=0; i<targets->count; i++) {
picker[i] = total;
total += targets->list[i].end - targets->list[i].begin + 1;
}
targets->picker = picker;
}
/***************************************************************************
* Provide my own rand() simply to avoid static-analysis warning me that
* 'rand()' is unrandom, when in fact we want the non-random properties of
* rand() for regression testing.
***************************************************************************/
static unsigned
r_rand(unsigned *seed)
{
static const unsigned a = 214013;
static const unsigned c = 2531011;
*seed = (*seed) * a + c;
return (*seed)>>16 & 0x7fff;
}
/***************************************************************************
***************************************************************************/
static int
regress_pick2()
{
unsigned i;
unsigned seed = 0;
/*
* Run 100 randomized regression tests
*/
for (i=0; i<100; i++) {
unsigned j;
unsigned num_targets;
unsigned begin = 0;
unsigned end;
struct RangeList targets[1] = {{0}};
struct RangeList duplicate[1] = {{0}};
unsigned range;
/* Create a new target list */
memset(targets, 0, sizeof(targets[0]));
/* fill the target list with random ranges */
num_targets = r_rand(&seed)%5 + 1;
for (j=0; j<num_targets; j++) {
begin += r_rand(&seed)%10;
end = begin + r_rand(&seed)%10;
rangelist_add_range(targets, begin, end);
}
rangelist_sort(targets);
range = (unsigned)rangelist_count(targets);
/* Optimize for faster 'picking' addresses from an index */
rangelist_optimize(targets);
/* Duplicate the targetlist using the picker */
memset(duplicate, 0, sizeof(duplicate[0]));
for (j=0; j<range; j++) {
unsigned x;
x = rangelist_pick(targets, j);
rangelist_add_range(duplicate, x, x);
}
rangelist_sort(duplicate);
/* at this point, the two range lists should be identical */
REGRESS(targets->count == duplicate->count);
REGRESS(memcmp(targets->list, duplicate->list, targets->count*sizeof(targets->list[0])) == 0);
rangelist_remove_all(targets);
rangelist_remove_all(duplicate);
}
return 0;
}
/***************************************************************************
* This returns a character pointer where parsing ends so that it can
* handle multiple stuff on the same line
***************************************************************************/
const char *
rangelist_parse_ports(struct RangeList *ports, const char *string, unsigned *is_error, unsigned proto_offset)
{
char *p = (char*)string;
unsigned tmp = 0;
if (is_error == NULL)
is_error = &tmp;
*is_error = 0;
while (*p) {
unsigned port;
unsigned end;