Skip to content

Commit 12ee626

Browse files
jordalgoJordan Rome
and
Jordan Rome
authored
Add per-cpu aggregations for min/max (bpftrace#3226)
Similar to what was done for `count` and `sum` allow `min` and `max` to be used in expressions and properly aggregated in map for loops. Co-authored-by: Jordan Rome <[email protected]>
1 parent 32e75a5 commit 12ee626

17 files changed

+1196
-49
lines changed

CHANGELOG.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ and this project adheres to
2525
- [#3158](https://github.com/bpftrace/bpftrace/pull/3158)
2626
- Add ability to attach uprobes to inlined functions
2727
- [#3095](https://github.com/bpftrace/bpftrace/pull/3095)
28-
- Enable count/sum map reads in kernel space (implicit casting)
28+
- Enable count, sum, min, and max map reads in kernel space (implicit casting)
2929
- [#3189](https://github.com/bpftrace/bpftrace/pull/3189)
30+
- [#3226](https://github.com/bpftrace/bpftrace/pull/3226)
3031
#### Changed
3132
- Better error message for args in mixed probes
3233
- [#3047](https://github.com/bpftrace/bpftrace/pull/3047)

man/adoc/bpftrace.adoc

+2
Original file line numberDiff line numberDiff line change
@@ -2545,6 +2545,7 @@ Prints:
25452545
* `max(int64 n)`
25462546

25472547
Update the map with `n` if `n` is bigger than the current value held.
2548+
Similar to `count` this uses a PER_CPU map (fast writes, slow reads).
25482549

25492550
[#map-functions-min]
25502551
=== min
@@ -2553,6 +2554,7 @@ Update the map with `n` if `n` is bigger than the current value held.
25532554
* `min(int64 n)`
25542555

25552556
Update the map with `n` if `n` is smaller than the current value held.
2557+
Similar to `count` this uses a PER_CPU map (fast writes, slow reads).
25562558

25572559
[#map-functions-stats]
25582560
=== stats

src/ast/irbuilderbpf.cpp

+61-13
Original file line numberDiff line numberDiff line change
@@ -480,14 +480,15 @@ Value *IRBuilderBPF::CreateMapLookupElem(Value *ctx,
480480
return ret;
481481
}
482482

483-
Value *IRBuilderBPF::CreatePerCpuMapSumElems(Value *ctx,
483+
Value *IRBuilderBPF::CreatePerCpuMapAggElems(Value *ctx,
484484
Map &map,
485485
Value *key,
486+
const SizedType &type,
486487
const location &loc,
487488
bool is_aot)
488489
{
489490
/*
490-
* int sum = 0;
491+
* int ret = 0;
491492
* int i = 0;
492493
* while (i < nr_cpus) {
493494
* int * cpu_value = map_lookup_percpu_elem(map, key, i);
@@ -498,25 +499,25 @@ Value *IRBuilderBPF::CreatePerCpuMapSumElems(Value *ctx,
498499
* debug("No cpu found for cpu id: %lu", i) // Mostly for AOT
499500
* break;
500501
* }
501-
* sum += *cpu_value;
502+
* // Get the sum, min, or max value
502503
* i++;
503504
* }
504-
* return sum;
505+
* return ret;
505506
*/
506507

507508
assert(ctx && ctx->getType() == GET_PTR_TY());
508509

509510
const std::string &map_name = map.ident;
510511

511-
AllocaInst *sum = CreateAllocaBPF(getInt64Ty(), "sum");
512+
AllocaInst *ret = CreateAllocaBPF(getInt64Ty(), "ret");
512513
AllocaInst *i = CreateAllocaBPF(getInt32Ty(), "i");
513514

514515
// Set a large upper bound if we don't know the number of cpus
515516
// when generating the instructions
516517
int nr_cpus = is_aot ? 1024 : bpftrace_.get_num_possible_cpus();
517518

518519
CreateStore(getInt32(0), i);
519-
CreateStore(getInt64(0), sum);
520+
CreateStore(getInt64(0), ret);
520521

521522
Function *parent = GetInsertBlock()->getParent();
522523
BasicBlock *while_cond = BasicBlock::Create(module_.getContext(),
@@ -560,10 +561,16 @@ Value *IRBuilderBPF::CreatePerCpuMapSumElems(Value *ctx,
560561
SetInsertPoint(lookup_success_block);
561562
// createMapLookup returns an u8*
562563
auto *cast = CreatePointerCast(call, getInt64Ty()->getPointerTo(), "cast");
563-
// sum += cpu_value;
564-
CreateStore(CreateAdd(CreateLoad(getInt64Ty(), cast),
565-
CreateLoad(getInt64Ty(), sum)),
566-
sum);
564+
565+
if (type.IsSumTy() || type.IsCountTy()) {
566+
createPerCpuSum(ret, cast);
567+
} else if (type.IsMaxTy()) {
568+
createPerCpuMinMax(ret, cast, true);
569+
} else if (type.IsMinTy()) {
570+
createPerCpuMinMax(ret, cast, false);
571+
} else {
572+
LOG(BUG) << "Unsupported map aggregation type: " << type;
573+
}
567574

568575
// ++i;
569576
CreateStore(CreateAdd(CreateLoad(getInt32Ty(), i), getInt32(1)), i);
@@ -603,9 +610,50 @@ Value *IRBuilderBPF::CreatePerCpuMapSumElems(Value *ctx,
603610
SetInsertPoint(while_end);
604611

605612
CreateLifetimeEnd(i);
606-
Value *ret = CreateLoad(getInt64Ty(), sum);
607-
CreateLifetimeEnd(sum);
608-
return ret;
613+
Value *ret_reg = CreateLoad(getInt64Ty(), ret);
614+
CreateLifetimeEnd(ret);
615+
return ret_reg;
616+
}
617+
618+
void IRBuilderBPF::createPerCpuSum(AllocaInst *ret, Value *cpu_value)
619+
{
620+
CreateStore(CreateAdd(CreateLoad(getInt64Ty(), cpu_value),
621+
CreateLoad(getInt64Ty(), ret)),
622+
ret);
623+
}
624+
625+
void IRBuilderBPF::createPerCpuMinMax(AllocaInst *ret,
626+
Value *cpu_value,
627+
bool is_max)
628+
{
629+
Function *parent = GetInsertBlock()->getParent();
630+
BasicBlock *success_block = BasicBlock::Create(module_.getContext(),
631+
"min_max_success",
632+
parent);
633+
BasicBlock *merge_block = BasicBlock::Create(module_.getContext(),
634+
"min_max_merge",
635+
parent);
636+
Value *condition;
637+
638+
if (is_max) {
639+
condition = CreateICmpSGT(CreateLoad(getInt64Ty(), cpu_value),
640+
CreateLoad(getInt64Ty(), ret),
641+
"max_cond");
642+
} else {
643+
condition = CreateICmpSLT(CreateLoad(getInt64Ty(), cpu_value),
644+
CreateLoad(getInt64Ty(), ret),
645+
"min_cond");
646+
}
647+
CreateCondBr(condition, success_block, merge_block);
648+
649+
SetInsertPoint(success_block);
650+
651+
// ret = cpu_value;
652+
CreateStore(CreateLoad(getInt64Ty(), cpu_value), ret);
653+
654+
CreateBr(merge_block);
655+
656+
SetInsertPoint(merge_block);
609657
}
610658

611659
void IRBuilderBPF::CreateMapUpdateElem(Value *ctx,

src/ast/irbuilderbpf.h

+5-2
Original file line numberDiff line numberDiff line change
@@ -89,10 +89,10 @@ class IRBuilderBPF : public IRBuilder<> {
8989
Value *key,
9090
SizedType &type,
9191
const location &loc);
92-
93-
Value *CreatePerCpuMapSumElems(Value *ctx,
92+
Value *CreatePerCpuMapAggElems(Value *ctx,
9493
Map &map,
9594
Value *key,
95+
const SizedType &type,
9696
const location &loc,
9797
bool is_aot);
9898
void CreateMapUpdateElem(Value *ctx,
@@ -338,6 +338,9 @@ class IRBuilderBPF : public IRBuilder<> {
338338
size_t size,
339339
const location *loc = nullptr);
340340

341+
void createPerCpuSum(AllocaInst *ret, Value *cpu_value);
342+
void createPerCpuMinMax(AllocaInst *ret, Value *cpu_value, bool is_max);
343+
341344
std::map<std::string, StructType *> structs_;
342345
};
343346

src/ast/passes/codegen_llvm.cpp

+5-3
Original file line numberDiff line numberDiff line change
@@ -1425,7 +1425,8 @@ void CodegenLLVM::visit(Map &map)
14251425
const auto &val_type = map_info->second.value_type;
14261426
Value *value;
14271427
if (canAggPerCpuMapElems(val_type, map_info->second.key)) {
1428-
value = b_.CreatePerCpuMapSumElems(ctx_, map, key, map.loc, is_aot_);
1428+
value = b_.CreatePerCpuMapAggElems(
1429+
ctx_, map, key, val_type, map.loc, is_aot_);
14291430
} else {
14301431
value = b_.CreateMapLookupElem(ctx_, map, key, map.loc);
14311432
}
@@ -4217,13 +4218,14 @@ Function *CodegenLLVM::createForEachMapCallback(
42174218
auto &val_type = decl.type.GetField(1).type;
42184219
Value *val = callback->getArg(2);
42194220

4220-
auto map_val_type = map_info->second.value_type;
4221+
const auto &map_val_type = map_info->second.value_type;
42214222
if (canAggPerCpuMapElems(map_val_type, map_info->second.key)) {
42224223
AllocaInst *key_ptr = b_.CreateAllocaBPF(b_.GetType(key_type),
42234224
"lookup_key");
42244225
b_.CreateStore(key, key_ptr);
42254226

4226-
val = b_.CreatePerCpuMapSumElems(ctx_, map, key_ptr, map.loc, is_aot_);
4227+
val = b_.CreatePerCpuMapAggElems(
4228+
ctx_, map, key_ptr, map_val_type, map.loc, is_aot_);
42274229
} else if (!onStack(val_type)) {
42284230
val = b_.CreateLoad(b_.GetType(val_type), val, "val");
42294231
}

src/types.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,8 @@ class SizedType {
414414
}
415415
bool IsCastableMapTy() const
416416
{
417-
return type_ == Type::count || type_ == Type::sum;
417+
return type_ == Type::count || type_ == Type::sum || type_ == Type::max ||
418+
type_ == Type::min;
418419
}
419420

420421
friend std::ostream &operator<<(std::ostream &, const SizedType &);

tests/codegen/llvm/count_cast.ll

+7-7
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ entry:
2121
%key = alloca i32, align 4
2222
%print_integer_8_t = alloca %print_integer_8_t, align 8
2323
%i = alloca i32, align 4
24-
%sum = alloca i64, align 8
24+
%ret = alloca i64, align 8
2525
%"@x_key1" = alloca i64, align 8
2626
%initial_value = alloca i64, align 8
2727
%lookup_elem_val = alloca i64, align 8
@@ -59,12 +59,12 @@ lookup_merge: ; preds = %lookup_failure, %lo
5959
%9 = bitcast i64* %"@x_key1" to i8*
6060
call void @llvm.lifetime.start.p0i8(i64 -1, i8* %9)
6161
store i64 0, i64* %"@x_key1", align 8
62-
%10 = bitcast i64* %sum to i8*
62+
%10 = bitcast i64* %ret to i8*
6363
call void @llvm.lifetime.start.p0i8(i64 -1, i8* %10)
6464
%11 = bitcast i32* %i to i8*
6565
call void @llvm.lifetime.start.p0i8(i64 -1, i8* %11)
6666
store i32 0, i32* %i, align 4
67-
store i64 0, i64* %sum, align 8
67+
store i64 0, i64* %ret, align 8
6868
br label %while_cond
6969

7070
if_body: ; preds = %while_end
@@ -100,8 +100,8 @@ while_body: ; preds = %while_cond
100100
while_end: ; preds = %error_failure, %error_success, %while_cond
101101
%20 = bitcast i32* %i to i8*
102102
call void @llvm.lifetime.end.p0i8(i64 -1, i8* %20)
103-
%21 = load i64, i64* %sum, align 8
104-
%22 = bitcast i64* %sum to i8*
103+
%21 = load i64, i64* %ret, align 8
104+
%22 = bitcast i64* %ret to i8*
105105
call void @llvm.lifetime.end.p0i8(i64 -1, i8* %22)
106106
%23 = bitcast i64* %"@x_key1" to i8*
107107
call void @llvm.lifetime.end.p0i8(i64 -1, i8* %23)
@@ -112,10 +112,10 @@ while_end: ; preds = %error_failure, %err
112112

113113
lookup_success2: ; preds = %while_body
114114
%cast5 = bitcast i8* %lookup_percpu_elem to i64*
115-
%26 = load i64, i64* %sum, align 8
115+
%26 = load i64, i64* %ret, align 8
116116
%27 = load i64, i64* %cast5, align 8
117117
%28 = add i64 %27, %26
118-
store i64 %28, i64* %sum, align 8
118+
store i64 %28, i64* %ret, align 8
119119
%29 = load i32, i32* %i, align 4
120120
%30 = add i32 %29, 1
121121
store i32 %30, i32* %i, align 4

tests/codegen/llvm/count_cast_loop.ll

+7-7
Original file line numberDiff line numberDiff line change
@@ -68,18 +68,18 @@ define internal i64 @map_for_each_cb(i8* %0, i8* %1, i8* %2, i8* %3) section ".t
6868
%tuple = alloca %"unsigned int64_count__tuple_t", align 8
6969
%"$kv" = alloca %"unsigned int64_count__tuple_t", align 8
7070
%i = alloca i32, align 4
71-
%sum = alloca i64, align 8
71+
%ret = alloca i64, align 8
7272
%lookup_key = alloca i64, align 8
7373
%key = load i64, i8* %1, align 8
7474
%5 = bitcast i64* %lookup_key to i8*
7575
call void @llvm.lifetime.start.p0i8(i64 -1, i8* %5)
7676
store i64 %key, i64* %lookup_key, align 8
77-
%6 = bitcast i64* %sum to i8*
77+
%6 = bitcast i64* %ret to i8*
7878
call void @llvm.lifetime.start.p0i8(i64 -1, i8* %6)
7979
%7 = bitcast i32* %i to i8*
8080
call void @llvm.lifetime.start.p0i8(i64 -1, i8* %7)
8181
store i32 0, i32* %i, align 4
82-
store i64 0, i64* %sum, align 8
82+
store i64 0, i64* %ret, align 8
8383
br label %while_cond
8484

8585
while_cond: ; preds = %lookup_success, %4
@@ -96,8 +96,8 @@ while_body: ; preds = %while_cond
9696
while_end: ; preds = %error_failure, %error_success, %while_cond
9797
%10 = bitcast i32* %i to i8*
9898
call void @llvm.lifetime.end.p0i8(i64 -1, i8* %10)
99-
%11 = load i64, i64* %sum, align 8
100-
%12 = bitcast i64* %sum to i8*
99+
%11 = load i64, i64* %ret, align 8
100+
%12 = bitcast i64* %ret to i8*
101101
call void @llvm.lifetime.end.p0i8(i64 -1, i8* %12)
102102
%13 = bitcast %"unsigned int64_count__tuple_t"* %"$kv" to i8*
103103
call void @llvm.lifetime.start.p0i8(i64 -1, i8* %13)
@@ -137,10 +137,10 @@ while_end: ; preds = %error_failure, %err
137137

138138
lookup_success: ; preds = %while_body
139139
%cast = bitcast i8* %lookup_percpu_elem to i64*
140-
%32 = load i64, i64* %sum, align 8
140+
%32 = load i64, i64* %ret, align 8
141141
%33 = load i64, i64* %cast, align 8
142142
%34 = add i64 %33, %32
143-
store i64 %34, i64* %sum, align 8
143+
store i64 %34, i64* %ret, align 8
144144
%35 = load i32, i32* %i, align 4
145145
%36 = add i32 %35, 1
146146
store i32 %36, i32* %i, align 4

0 commit comments

Comments
 (0)