forked from wuxb45/wormhole
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlib.h
681 lines (503 loc) · 13.7 KB
/
lib.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
/*
* Copyright (c) 2016--2021 Wu, Xingbo <[email protected]>
*
* All rights reserved. No warranty, explicit or implicit, provided.
*/
#pragma once
// includes {{{
// C headers
#include <errno.h>
#include <inttypes.h>
#include <math.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
// POSIX headers
#include <fcntl.h>
#include <pthread.h>
#include <unistd.h>
// Linux headers
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/types.h>
// SIMD
#if defined(__x86_64__)
#include <x86intrin.h>
#elif defined(__aarch64__)
#include <arm_acle.h>
#include <arm_neon.h>
#endif
// }}} includes
#ifdef __cplusplus
extern "C" {
#endif
// types {{{
typedef char s8;
typedef short s16;
typedef int s32;
typedef long s64;
typedef __int128_t s128;
static_assert(sizeof(s8) == 1, "sizeof(s8)");
static_assert(sizeof(s16) == 2, "sizeof(s16)");
static_assert(sizeof(s32) == 4, "sizeof(s32)");
static_assert(sizeof(s64) == 8, "sizeof(s64)");
static_assert(sizeof(s128) == 16, "sizeof(s128)");
typedef unsigned char u8;
typedef unsigned short u16;
typedef unsigned int u32;
typedef unsigned long u64;
typedef __uint128_t u128;
static_assert(sizeof(u8) == 1, "sizeof(u8)");
static_assert(sizeof(u16) == 2, "sizeof(u16)");
static_assert(sizeof(u32) == 4, "sizeof(u32)");
static_assert(sizeof(u64) == 8, "sizeof(u64)");
static_assert(sizeof(u128) == 16, "sizeof(u128)");
#if defined(__x86_64__)
typedef __m128i m128;
#if defined(__AVX2__)
typedef __m256i m256;
#endif // __AVX2__
#if defined(__AVX512F__)
typedef __m512i m512;
#endif // __AVX512F__
#elif defined(__aarch64__)
typedef uint8x16_t m128;
#else
#error Need x86_64 or AArch64.
#endif
// }}} types
// defs {{{
#define likely(____x____) __builtin_expect(____x____, 1)
#define unlikely(____x____) __builtin_expect(____x____, 0)
// ansi colors
// 3X:fg; 4X:bg; 9X:light fg; 10X:light bg;
// X can be one of the following colors:
// 0:black; 1:red; 2:green; 3:yellow;
// 4:blue; 5:magenta; 6:cyan; 7:white;
#define TERMCLR(____code____) "\x1b[" #____code____ "m"
// }}} defs
// const {{{
#define PGSZ ((4096lu))
// }}} const
// math {{{
extern u64
mhash64(const u64 v);
extern u32
mhash32(const u32 v);
extern u64
gcd64(u64 a, u64 b);
// }}} math
// random {{{
extern u64
random_u64(void);
extern void
srandom_u64(const u64 seed);
extern double
random_double(void);
// }}} random
// timing {{{
extern u64
time_nsec(void);
extern double
time_sec(void);
extern u64
time_diff_nsec(const u64 last);
extern double
time_diff_sec(const double last);
extern void
time_stamp(char * str, const size_t size);
extern void
time_stamp2(char * str, const size_t size);
// }}} timing
// cpucache {{{
extern void
cpu_pause(void);
extern void
cpu_mfence(void);
extern void
cpu_cfence(void);
extern void
cpu_prefetch0(const void * const ptr);
extern void
cpu_prefetch1(const void * const ptr);
extern void
cpu_prefetch2(const void * const ptr);
extern void
cpu_prefetch3(const void * const ptr);
extern void
cpu_prefetchw(const void * const ptr);
// }}} cpucache
// crc32c {{{
extern u32
crc32c_u8(const u32 crc, const u8 v);
extern u32
crc32c_u16(const u32 crc, const u16 v);
extern u32
crc32c_u32(const u32 crc, const u32 v);
extern u32
crc32c_u64(const u32 crc, const u64 v);
// 1 <= nr <= 3
extern u32
crc32c_inc_123(const u8 * buf, u32 nr, u32 crc);
// nr % 4 == 0
extern u32
crc32c_inc_x4(const u8 * buf, u32 nr, u32 crc);
extern u32
crc32c_inc(const u8 * buf, u32 nr, u32 crc);
// }}} crc32c
// debug {{{
extern void
debug_break(void);
extern void
debug_backtrace(void);
extern void
watch_u64_usr1(u64 * const ptr);
#ifndef NDEBUG
extern void
debug_assert(const bool v);
#else
#define debug_assert(expr) ((void)0)
#endif
__attribute__((noreturn))
extern void
debug_die(void);
__attribute__((noreturn))
extern void
debug_die_perror(void);
extern void
debug_dump_maps(FILE * const out);
extern bool
debug_perf_switch(void);
// }}} debug
// mm {{{
#ifdef ALLOCFAIL
extern bool
alloc_fail(void);
#endif
extern void *
xalloc(const size_t align, const size_t size);
extern void *
yalloc(const size_t size);
extern void **
malloc_2d(const size_t nr, const size_t size);
extern void **
calloc_2d(const size_t nr, const size_t size);
extern void
pages_unmap(void * const ptr, const size_t size);
extern void
pages_lock(void * const ptr, const size_t size);
/* hugepages */
// force posix allocators: -DVALGRIND_MEMCHECK
extern void *
pages_alloc_4kb(const size_t nr_4kb);
extern void *
pages_alloc_2mb(const size_t nr_2mb);
extern void *
pages_alloc_1gb(const size_t nr_1gb);
extern void *
pages_alloc_best(const size_t size, const bool try_1gb, u64 * const size_out);
// }}} mm
// process/thread {{{
extern void
thread_get_name(const pthread_t pt, char * const name, const size_t len);
extern void
thread_set_name(const pthread_t pt, const char * const name);
extern long
process_get_rss(void);
extern u32
process_affinity_count(void);
extern u32
process_getaffinity_list(const u32 max, u32 * const cores);
extern void
thread_setaffinity_list(const u32 nr, const u32 * const list);
extern void
thread_pin(const u32 cpu);
extern u64
process_cpu_time_usec(void);
// if args == true, argx is void **
// if args == false, argx is void *
extern u64
thread_fork_join(u32 nr, void *(*func) (void *), const bool args, void * const argx);
extern int
thread_create_at(const u32 cpu, pthread_t * const thread, void *(*start_routine) (void *), void * const arg);
// }}} process/thread
// locking {{{
typedef union {
u32 opaque;
} spinlock;
extern void
spinlock_init(spinlock * const lock);
extern void
spinlock_lock(spinlock * const lock);
extern bool
spinlock_trylock(spinlock * const lock);
extern void
spinlock_unlock(spinlock * const lock);
typedef union {
u32 opaque;
} rwlock;
extern void
rwlock_init(rwlock * const lock);
extern bool
rwlock_trylock_read(rwlock * const lock);
// low-priority reader-lock; use with trylock_write_hp
extern bool
rwlock_trylock_read_lp(rwlock * const lock);
extern bool
rwlock_trylock_read_nr(rwlock * const lock, u16 nr);
extern void
rwlock_lock_read(rwlock * const lock);
extern void
rwlock_unlock_read(rwlock * const lock);
extern bool
rwlock_trylock_write(rwlock * const lock);
extern bool
rwlock_trylock_write_nr(rwlock * const lock, u16 nr);
extern void
rwlock_lock_write(rwlock * const lock);
// writer has higher priority; new readers are blocked
extern bool
rwlock_trylock_write_hp(rwlock * const lock);
extern bool
rwlock_trylock_write_hp_nr(rwlock * const lock, u16 nr);
extern void
rwlock_lock_write_hp(rwlock * const lock);
extern void
rwlock_unlock_write(rwlock * const lock);
extern void
rwlock_write_to_read(rwlock * const lock);
typedef union {
u64 opqaue[8];
} mutex;
extern void
mutex_init(mutex * const lock);
extern void
mutex_lock(mutex * const lock);
extern bool
mutex_trylock(mutex * const lock);
extern void
mutex_unlock(mutex * const lock);
extern void
mutex_deinit(mutex * const lock);
// }}} locking
// coroutine {{{
extern u64 co_switch_stack(u64 * const saversp, const u64 newrsp, const u64 retval);
struct co;
extern struct co *
co_create(const u64 stacksize, void * func, void * priv, u64 * const host);
extern void
co_reuse(struct co * const co, void * func, void * priv, u64 * const host);
extern struct co *
co_fork(void * func, void * priv);
extern void *
co_priv(void);
extern u64
co_enter(struct co * const to, const u64 retval);
extern u64
co_switch_to(struct co * const to, const u64 retval);
extern u64
co_back(const u64 retval);
extern void
co_exit(const u64 retval);
extern bool
co_valid(struct co * const co);
extern struct co *
co_self(void);
extern void
co_destroy(struct co * const co);
struct corr;
extern struct corr *
corr_create(const u64 stacksize, void * func, void * priv, u64 * const host);
extern struct corr *
corr_link(const u64 stacksize, void * func, void * priv, struct corr * const prev);
extern void
corr_reuse(struct corr * const co, void * func, void * priv, u64 * const host);
extern void
corr_relink(struct corr * const co, void * func, void * priv, struct corr * const prev);
extern void
corr_enter(struct corr * const co);
extern void
corr_yield(void);
extern void
corr_exit(void);
extern void
corr_destroy(struct corr * const co);
// }}} coroutine
// bits {{{
extern u32
bits_reverse_u32(const u32 v);
extern u64
bits_reverse_u64(const u64 v);
extern u64
bits_rotl_u64(const u64 v, const u8 n);
extern u64
bits_rotr_u64(const u64 v, const u8 n);
extern u32
bits_rotl_u32(const u32 v, const u8 n);
extern u32
bits_rotr_u32(const u32 v, const u8 n);
extern u64
bits_p2_up_u64(const u64 v);
extern u32
bits_p2_up_u32(const u32 v);
extern u64
bits_p2_down_u64(const u64 v);
extern u32
bits_p2_down_u32(const u32 v);
extern u64
bits_round_up(const u64 v, const u8 power);
extern u64
bits_round_up_a(const u64 v, const u64 a);
extern u64
bits_round_down(const u64 v, const u8 power);
extern u64
bits_round_down_a(const u64 v, const u64 a);
// }}} bits
// vi128 {{{
extern u32
vi128_estimate_u32(const u32 v);
extern u8 *
vi128_encode_u32(u8 * dst, u32 v);
extern const u8 *
vi128_decode_u32(const u8 * src, u32 * const out);
extern u32
vi128_estimate_u64(const u64 v);
extern u8 *
vi128_encode_u64(u8 * dst, u64 v);
extern const u8 *
vi128_decode_u64(const u8 * src, u64 * const out);
// }}} vi128
// misc {{{
// TODO: only works on little endian?
struct entry13 { // what a beautiful name
union {
u16 e1;
struct { // easy for debugging
u64 e1_64:16;
u64 e3:48;
};
u64 v64;
void * ptr;
};
};
static_assert(sizeof(struct entry13) == 8, "sizeof(entry13) != 8");
// directly access read .e1 and .e3
// directly write .e1
// use entry13_update() to update the entire entry
extern struct entry13
entry13(const u16 e1, const u64 e3);
extern void
entry13_update_e3(struct entry13 * const e, const u64 e3);
extern void *
u64_to_ptr(const u64 v);
extern u64
ptr_to_u64(const void * const ptr);
extern size_t
m_usable_size(void * const ptr);
extern size_t
fdsize(const int fd);
extern u32
memlcp(const u8 * const p1, const u8 * const p2, const u32 max);
__attribute__ ((format (printf, 2, 3)))
extern void
logger_printf(const int fd, const char * const fmt, ...);
// }}} misc
// slab {{{
struct slab;
extern struct slab *
slab_create(const u64 obj_size, const u64 blk_size);
extern bool
slab_reserve_unsafe(struct slab * const slab, const u64 nr);
extern void *
slab_alloc_unsafe(struct slab * const slab);
extern void *
slab_alloc_safe(struct slab * const slab);
extern void
slab_free_unsafe(struct slab * const slab, void * const ptr);
extern void
slab_free_safe(struct slab * const slab, void * const ptr);
extern void
slab_free_all(struct slab * const slab);
extern u64
slab_get_nalloc(struct slab * const slab);
extern void
slab_destroy(struct slab * const slab);
// }}} slab
// string {{{
// XXX strdec_ and strhex_ functions does not append the trailing '\0' to the output string
// size of out should be >= 10
extern void
strdec_32(void * const out, const u32 v);
// size of out should be >= 20
extern void
strdec_64(void * const out, const u64 v);
// size of out should be >= 8
extern void
strhex_32(void * const out, const u32 v);
// size of out should be >= 16
extern void
strhex_64(void * const out, const u64 v);
extern u64
a2u64(const void * const str);
extern u32
a2u32(const void * const str);
extern s64
a2s64(const void * const str);
extern s32
a2s32(const void * const str);
extern void
str_print_hex(FILE * const out, const void * const data, const u32 len);
extern void
str_print_dec(FILE * const out, const void * const data, const u32 len);
// user should free returned ptr (and nothing else) after use
extern char **
strtoks(const char * const str, const char * const delim);
extern u32
strtoks_count(const char * const * const toks);
// }}} string
// qsbr {{{
// QSBR vs EBR (Quiescent-State vs Epoch Based Reclaimation)
// QSBR: readers just use qsbr_update -> qsbr_update -> ... repeatedly
// EBR: readers use qsbr_update -> qsbr_park -> qsbr_resume -> qsbr_update -> ...
// The advantage of EBR is qsbr_park can happen much earlier than the next qsbr_update
// The disadvantage is the extra cost, a pair of park/resume is used in every iteration
struct qsbr;
struct qsbr_ref {
#ifdef QSBR_DEBUG
u64 debug[16];
#endif
u64 opaque[3];
};
extern struct qsbr *
qsbr_create(void);
// every READER accessing the shared data must first register itself with the qsbr
extern bool
qsbr_register(struct qsbr * const q, struct qsbr_ref * const qref);
extern void
qsbr_unregister(struct qsbr * const q, struct qsbr_ref * const qref);
// For READER: mark the beginning of critical section; like rcu_read_lock()
extern void
qsbr_update(struct qsbr_ref * const qref, const u64 v);
// temporarily stop access the shared data to avoid blocking writers
// READER can use qsbr_park (like rcu_read_unlock()) in conjunction with qsbr_update
// qsbr_park is roughly equivalent to qsbr_unregister, but faster
extern void
qsbr_park(struct qsbr_ref * const qref);
// undo the effect of qsbr_park; must use it between qsbr_park and qsbr_update
// qsbr_resume is roughly equivalent to qsbr_register, but faster
extern void
qsbr_resume(struct qsbr_ref * const qref);
// WRITER: wait until all the readers have announced v=target with qsbr_update
extern void
qsbr_wait(struct qsbr * const q, const u64 target);
extern void
qsbr_destroy(struct qsbr * const q);
// }}} qsbr
#ifdef __cplusplus
}
#endif
// vim:fdm=marker