Skip to content

Commit db38ba5

Browse files
twd2memfrob
authored and
memfrob
committed
[Bitcode] Include indirect users of BlockAddresses in bitcode
The original fix (commit 23ec5782c3cc) of llvm/llvm-project#52787 only adds `Function`s that have `Instruction`s that directly use `BlockAddress`es into the bitcode (`FUNC_CODE_BLOCKADDR_USERS`). However, in either @rickyz's original reproducing code: ``` void f(long); __attribute__((noinline)) static void fun(long x) { f(x + 1); } void repro(void) { fun(({ label: (long)&&label; })); } ``` ``` ... define dso_local void @repro() #0 { entry: br label %label label: ; preds = %entry tail call fastcc void @fun() ret void } define internal fastcc void @fun() unnamed_addr #1 { entry: tail call void @f(i64 add (i64 ptrtoint (i8* blockaddress(@repro, %label) to i64), i64 1)) #3 ret void } ... ``` or the xfs and overlayfs in the Linux kernel, `BlockAddress`es (e.g., `i8* blockaddress(@repro, %label)`) may first compose `ConstantExpr`s (e.g., `i64 ptrtoint (i8* blockaddress(@repro, %label) to i64)`) and then used by `Instruction`s. This case is not handled by the original fix. This patch adds *indirect* users of `BlockAddress`es, i.e., the `Instruction`s using some `Constant`s which further use the `BlockAddress`es, into the bitcode as well, by doing depth-first searches. Fixes: llvm/llvm-project#52787 Fixes: 23ec5782c3cc ("[Bitcode] materialize Functions early when BlockAddress taken") Reviewed By: nickdesaulniers Differential Revision: https://reviews.llvm.org/D124878
1 parent c669827 commit db38ba5

File tree

5 files changed

+174
-6
lines changed

5 files changed

+174
-6
lines changed

llvm/lib/Bitcode/Writer/BitcodeWriter.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "llvm/ADT/None.h"
2020
#include "llvm/ADT/Optional.h"
2121
#include "llvm/ADT/STLExtras.h"
22+
#include "llvm/ADT/SetVector.h"
2223
#include "llvm/ADT/SmallPtrSet.h"
2324
#include "llvm/ADT/SmallString.h"
2425
#include "llvm/ADT/SmallVector.h"
@@ -3365,7 +3366,7 @@ void ModuleBitcodeWriter::writeFunction(
33653366
bool NeedsMetadataAttachment = F.hasMetadata();
33663367

33673368
DILocation *LastDL = nullptr;
3368-
SmallPtrSet<Function *, 4> BlockAddressUsers;
3369+
SmallSetVector<Function *, 4> BlockAddressUsers;
33693370

33703371
// Finally, emit all the instructions, in order.
33713372
for (const BasicBlock &BB : F) {
@@ -3401,11 +3402,24 @@ void ModuleBitcodeWriter::writeFunction(
34013402
}
34023403

34033404
if (BlockAddress *BA = BlockAddress::lookup(&BB)) {
3404-
for (User *U : BA->users()) {
3405-
if (auto *I = dyn_cast<Instruction>(U)) {
3406-
Function *P = I->getParent()->getParent();
3407-
if (P != &F)
3408-
BlockAddressUsers.insert(P);
3405+
SmallVector<Value *, 16> BlockAddressUsersStack { BA };
3406+
SmallPtrSet<Value *, 16> BlockAddressUsersVisited { BA };
3407+
3408+
while (!BlockAddressUsersStack.empty()) {
3409+
Value *V = BlockAddressUsersStack.pop_back_val();
3410+
3411+
for (User *U : V->users()) {
3412+
if ((isa<ConstantAggregate>(U) || isa<ConstantExpr>(U)) &&
3413+
!BlockAddressUsersVisited.contains(U)) {
3414+
BlockAddressUsersStack.push_back(U);
3415+
BlockAddressUsersVisited.insert(U);
3416+
}
3417+
3418+
if (auto *I = dyn_cast<Instruction>(U)) {
3419+
Function *P = I->getParent()->getParent();
3420+
if (P != &F)
3421+
BlockAddressUsers.insert(P);
3422+
}
34093423
}
34103424
}
34113425
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; RUN: llvm-as %s -o %t.bc
2+
; RUN: llvm-bcanalyzer -dump %t.bc | FileCheck %s
3+
; RUN: llvm-dis %t.bc
4+
5+
; There's a curious case where blockaddress constants may refer to functions
6+
; outside of the function they're used in. There's a special bitcode function
7+
; code, FUNC_CODE_BLOCKADDR_USERS, used to signify that this is the case.
8+
9+
; The intent of this test is two-fold:
10+
; 1. Ensure we produce BLOCKADDR_USERS bitcode function code on the first fn,
11+
; @repro, since @fun and @fun2 both refer to @repro via blockaddress
12+
; constants.
13+
; 2. Ensure we can round-trip serializing+desearlizing such case.
14+
15+
; CHECK: <FUNCTION_BLOCK
16+
; CHECK: <BLOCKADDR_USERS op0=2 op1=1
17+
; CHECK: <FUNCTION_BLOCK
18+
; CHECK: <FUNCTION_BLOCK
19+
20+
%struct.ptrs = type { i8*, i8* }
21+
22+
define void @repro() {
23+
br label %label
24+
25+
label:
26+
call void @fun()
27+
ret void
28+
}
29+
30+
define void @fun() noinline {
31+
call void @f(%struct.ptrs { i8* blockaddress(@repro, %label), i8* blockaddress(@repro, %label) })
32+
ret void
33+
}
34+
35+
define void @fun2() noinline {
36+
call void @f(%struct.ptrs { i8* blockaddress(@repro, %label), i8* blockaddress(@repro, %label) })
37+
ret void
38+
}
39+
40+
declare void @f(%struct.ptrs)
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; RUN: llvm-as %s -o %t.bc
2+
; RUN: llvm-bcanalyzer -dump %t.bc | FileCheck %s
3+
; RUN: llvm-dis %t.bc
4+
5+
; There's a curious case where blockaddress constants may refer to functions
6+
; outside of the function they're used in. There's a special bitcode function
7+
; code, FUNC_CODE_BLOCKADDR_USERS, used to signify that this is the case.
8+
9+
; The intent of this test is two-fold:
10+
; 1. Ensure we produce BLOCKADDR_USERS bitcode function code on the first fn,
11+
; @repro, since @fun and @fun2 both refer to @repro via blockaddress
12+
; constants.
13+
; 2. Ensure we can round-trip serializing+desearlizing such case.
14+
15+
; CHECK: <FUNCTION_BLOCK
16+
; CHECK: <BLOCKADDR_USERS op0=2 op1=1
17+
; CHECK: <FUNCTION_BLOCK
18+
; CHECK: <FUNCTION_BLOCK
19+
20+
define void @repro() {
21+
br label %label
22+
23+
label:
24+
call void @fun()
25+
ret void
26+
}
27+
28+
define void @fun() noinline {
29+
call void @f(i64 ptrtoint (i8* blockaddress(@repro, %label) to i64))
30+
ret void
31+
}
32+
33+
define void @fun2() noinline {
34+
call void @f(i64 ptrtoint (i8* blockaddress(@repro, %label) to i64))
35+
ret void
36+
}
37+
38+
declare void @f(i64)
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; RUN: llvm-as %s -o %t.bc
2+
; RUN: llvm-bcanalyzer -dump %t.bc | FileCheck %s
3+
; RUN: llvm-dis %t.bc
4+
5+
; There's a curious case where blockaddress constants may refer to functions
6+
; outside of the function they're used in. There's a special bitcode function
7+
; code, FUNC_CODE_BLOCKADDR_USERS, used to signify that this is the case.
8+
9+
; The intent of this test is two-fold:
10+
; 1. Ensure we do not produce BLOCKADDR_USERS bitcode function code on the first
11+
; fn, @repro, by accident, when @fun and @fun2 use a global value, @foo,
12+
; which is initialized to @repro's blockaddress constants.
13+
; 2. Ensure we can round-trip serializing+desearlizing such case.
14+
15+
; CHECK: <FUNCTION_BLOCK
16+
; CHECK-NOT: <BLOCKADDR_USERS
17+
18+
@foo = global i8* blockaddress(@repro, %label)
19+
20+
define void @repro() {
21+
br label %label
22+
23+
label:
24+
call void @fun()
25+
ret void
26+
}
27+
28+
define void @fun() noinline {
29+
call void @f(i8** @foo)
30+
ret void
31+
}
32+
33+
define void @fun2() noinline {
34+
call void @f(i8** @foo)
35+
ret void
36+
}
37+
38+
declare void @f(i8**)
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
; RUN: llvm-as %s -o %t.bc
2+
; RUN: llvm-bcanalyzer -dump %t.bc | FileCheck %s
3+
; RUN: llvm-dis %t.bc
4+
5+
; There's a curious case where blockaddress constants may refer to functions
6+
; outside of the function they're used in. There's a special bitcode function
7+
; code, FUNC_CODE_BLOCKADDR_USERS, used to signify that this is the case.
8+
9+
; The intent of this test is two-fold:
10+
; 1. Ensure we produce BLOCKADDR_USERS bitcode function code on the first fn,
11+
; @repro, since @fun and @fun2 both refer to @repro via blockaddress
12+
; constants.
13+
; 2. Ensure we can round-trip serializing+desearlizing such case.
14+
15+
; CHECK: <FUNCTION_BLOCK
16+
; CHECK: <BLOCKADDR_USERS op0=1 op1=2
17+
; CHECK: <FUNCTION_BLOCK
18+
; CHECK: <FUNCTION_BLOCK
19+
20+
define void @repro() {
21+
br label %label
22+
23+
label:
24+
call void @fun()
25+
ret void
26+
}
27+
28+
define void @fun() noinline {
29+
call void @f(i64 add (i64 ptrtoint (i8* blockaddress(@repro, %label) to i64), i64 1))
30+
ret void
31+
}
32+
33+
define void @fun2() noinline {
34+
call void @f(i64 add (i64 ptrtoint (i8* blockaddress(@repro, %label) to i64), i64 2))
35+
ret void
36+
}
37+
38+
declare void @f(i64)

0 commit comments

Comments
 (0)