Skip to content

Commit bcf4dc1

Browse files
committed
LLVM 20: Add vendored profdata/profgen tools and FileCheck util
1 parent 11babd1 commit bcf4dc1

17 files changed

+11010
-1
lines changed

tools/CMakeLists.txt

+9-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,15 @@ if(LDC_BUNDLE_LLVM_TOOLS)
3838
COMPILE_FLAGS "${LLVM_CXXFLAGS} ${LDC_CXXFLAGS}"
3939
LINK_FLAGS "${SANITIZE_LDFLAGS} ${FULLY_STATIC_LDFLAG}"
4040
)
41-
target_link_libraries(ldc-profdata ${LLVM_LIBRARIES} ${CMAKE_DL_LIBS} ${LLVM_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT})
41+
set(link_llvmdebuginfod)
42+
if(LLVM_VERSION_MAJOR GREATER 19)
43+
if(MSVC)
44+
set(link_llvmdebuginfod "${LLVM_LIBRARY_DIRS}/LLVMDebuginfod.lib")
45+
else()
46+
set(link_llvmdebuginfod -lLLVMDebuginfod)
47+
endif()
48+
endif()
49+
target_link_libraries(ldc-profdata ${link_llvmdebuginfod} ${LLVM_LIBRARIES} ${CMAKE_DL_LIBS} ${LLVM_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT})
4250
install(TARGETS ldc-profdata DESTINATION ${CMAKE_INSTALL_PREFIX}/bin)
4351

4452
# Set path to executable, used by the lit testsuite.

tools/ldc-profdata/llvm-profdata-20.1.cpp

+3,483
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
2+
set(LLVM_LINK_COMPONENTS
3+
AllTargetsDescs
4+
AllTargetsDisassemblers
5+
AllTargetsInfos
6+
DebugInfoDWARF
7+
Core
8+
MC
9+
IPO
10+
MCDisassembler
11+
Object
12+
ProfileData
13+
Support
14+
Symbolize
15+
TargetParser
16+
)
17+
18+
add_llvm_tool(llvm-profgen
19+
llvm-profgen.cpp
20+
PerfReader.cpp
21+
CSPreInliner.cpp
22+
ProfiledBinary.cpp
23+
ProfileGenerator.cpp
24+
MissingFrameInferrer.cpp
25+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,316 @@
1+
//===-- CSPreInliner.cpp - Profile guided preinliner -------------- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "CSPreInliner.h"
10+
#include "ProfiledBinary.h"
11+
#include "llvm/ADT/SCCIterator.h"
12+
#include "llvm/ADT/Statistic.h"
13+
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
14+
#include "llvm/Transforms/IPO/SampleProfile.h"
15+
#include <cstdint>
16+
#include <queue>
17+
18+
#define DEBUG_TYPE "cs-preinliner"
19+
20+
using namespace llvm;
21+
using namespace sampleprof;
22+
23+
STATISTIC(PreInlNumCSInlined,
24+
"Number of functions inlined with context sensitive profile");
25+
STATISTIC(PreInlNumCSNotInlined,
26+
"Number of functions not inlined with context sensitive profile");
27+
STATISTIC(PreInlNumCSInlinedHitMinLimit,
28+
"Number of functions with FDO inline stopped due to min size limit");
29+
STATISTIC(PreInlNumCSInlinedHitMaxLimit,
30+
"Number of functions with FDO inline stopped due to max size limit");
31+
STATISTIC(
32+
PreInlNumCSInlinedHitGrowthLimit,
33+
"Number of functions with FDO inline stopped due to growth size limit");
34+
35+
// The switches specify inline thresholds used in SampleProfileLoader inlining.
36+
// TODO: the actual threshold to be tuned here because the size here is based
37+
// on machine code not LLVM IR.
38+
namespace llvm {
39+
cl::opt<bool> EnableCSPreInliner(
40+
"csspgo-preinliner", cl::Hidden, cl::init(true),
41+
cl::desc("Run a global pre-inliner to merge context profile based on "
42+
"estimated global top-down inline decisions"));
43+
44+
cl::opt<bool> UseContextCostForPreInliner(
45+
"use-context-cost-for-preinliner", cl::Hidden, cl::init(true),
46+
cl::desc("Use context-sensitive byte size cost for preinliner decisions"));
47+
} // namespace llvm
48+
49+
static cl::opt<bool> SamplePreInlineReplay(
50+
"csspgo-replay-preinline", cl::Hidden, cl::init(false),
51+
cl::desc(
52+
"Replay previous inlining and adjust context profile accordingly"));
53+
54+
static cl::opt<int> CSPreinlMultiplierForPrevInl(
55+
"csspgo-preinliner-multiplier-for-previous-inlining", cl::Hidden,
56+
cl::init(100),
57+
cl::desc(
58+
"Multiplier to bump up callsite threshold for previous inlining."));
59+
60+
CSPreInliner::CSPreInliner(SampleContextTracker &Tracker,
61+
ProfiledBinary &Binary, ProfileSummary *Summary)
62+
: UseContextCost(UseContextCostForPreInliner),
63+
// TODO: Pass in a guid-to-name map in order for
64+
// ContextTracker.getFuncNameFor to work, if `Profiles` can have md5 codes
65+
// as their profile context.
66+
ContextTracker(Tracker), Binary(Binary), Summary(Summary) {
67+
// Set default preinliner hot/cold call site threshold tuned with CSSPGO.
68+
// for good performance with reasonable profile size.
69+
if (!SampleHotCallSiteThreshold.getNumOccurrences())
70+
SampleHotCallSiteThreshold = 1500;
71+
if (!SampleColdCallSiteThreshold.getNumOccurrences())
72+
SampleColdCallSiteThreshold = 0;
73+
if (!ProfileInlineLimitMax.getNumOccurrences())
74+
ProfileInlineLimitMax = 50000;
75+
}
76+
77+
std::vector<FunctionId> CSPreInliner::buildTopDownOrder() {
78+
std::vector<FunctionId> Order;
79+
// Trim cold edges to get a more stable call graph. This allows for a more
80+
// stable top-down order which in turns helps the stablity of the generated
81+
// profile from run to run.
82+
uint64_t ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
83+
(Summary->getDetailedSummary()));
84+
ProfiledCallGraph ProfiledCG(ContextTracker, ColdCountThreshold);
85+
86+
// Now that we have a profiled call graph, construct top-down order
87+
// by building up SCC and reversing SCC order.
88+
scc_iterator<ProfiledCallGraph *> I = scc_begin(&ProfiledCG);
89+
while (!I.isAtEnd()) {
90+
auto Range = *I;
91+
if (SortProfiledSCC) {
92+
// Sort nodes in one SCC based on callsite hotness.
93+
scc_member_iterator<ProfiledCallGraph *> SI(*I);
94+
Range = *SI;
95+
}
96+
for (auto *Node : Range) {
97+
if (Node != ProfiledCG.getEntryNode())
98+
Order.push_back(Node->Name);
99+
}
100+
++I;
101+
}
102+
std::reverse(Order.begin(), Order.end());
103+
104+
return Order;
105+
}
106+
107+
bool CSPreInliner::getInlineCandidates(ProfiledCandidateQueue &CQueue,
108+
const FunctionSamples *CallerSamples) {
109+
assert(CallerSamples && "Expect non-null caller samples");
110+
111+
// Ideally we want to consider everything a function calls, but as far as
112+
// context profile is concerned, only those frames that are children of
113+
// current one in the trie is relavent. So we walk the trie instead of call
114+
// targets from function profile.
115+
ContextTrieNode *CallerNode =
116+
ContextTracker.getContextNodeForProfile(CallerSamples);
117+
118+
bool HasNewCandidate = false;
119+
for (auto &Child : CallerNode->getAllChildContext()) {
120+
ContextTrieNode *CalleeNode = &Child.second;
121+
FunctionSamples *CalleeSamples = CalleeNode->getFunctionSamples();
122+
if (!CalleeSamples)
123+
continue;
124+
125+
// Call site count is more reliable, so we look up the corresponding call
126+
// target profile in caller's context profile to retrieve call site count.
127+
uint64_t CalleeEntryCount = CalleeSamples->getHeadSamplesEstimate();
128+
uint64_t CallsiteCount = 0;
129+
LineLocation Callsite = CalleeNode->getCallSiteLoc();
130+
if (auto CallTargets = CallerSamples->findCallTargetMapAt(Callsite)) {
131+
auto It = CallTargets->find(CalleeSamples->getFunction());
132+
if (It != CallTargets->end())
133+
CallsiteCount = It->second;
134+
}
135+
136+
// TODO: call site and callee entry count should be mostly consistent, add
137+
// check for that.
138+
HasNewCandidate = true;
139+
uint32_t CalleeSize = getFuncSize(CalleeNode);
140+
CQueue.emplace(CalleeSamples, std::max(CallsiteCount, CalleeEntryCount),
141+
CalleeSize);
142+
}
143+
144+
return HasNewCandidate;
145+
}
146+
147+
uint32_t CSPreInliner::getFuncSize(const ContextTrieNode *ContextNode) {
148+
if (UseContextCost)
149+
return Binary.getFuncSizeForContext(ContextNode);
150+
151+
return ContextNode->getFunctionSamples()->getBodySamples().size();
152+
}
153+
154+
bool CSPreInliner::shouldInline(ProfiledInlineCandidate &Candidate) {
155+
bool WasInlined =
156+
Candidate.CalleeSamples->getContext().hasAttribute(ContextWasInlined);
157+
// If replay inline is requested, simply follow the inline decision of the
158+
// profiled binary.
159+
if (SamplePreInlineReplay)
160+
return WasInlined;
161+
162+
unsigned int SampleThreshold = SampleColdCallSiteThreshold;
163+
uint64_t ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
164+
(Summary->getDetailedSummary()));
165+
166+
if (Candidate.CallsiteCount <= ColdCountThreshold)
167+
SampleThreshold = SampleColdCallSiteThreshold;
168+
else {
169+
// Linearly adjust threshold based on normalized hotness, i.e, a value in
170+
// [0,1]. Use 10% cutoff instead of the max count as the normalization
171+
// upperbound for stability.
172+
double NormalizationUpperBound =
173+
ProfileSummaryBuilder::getEntryForPercentile(
174+
Summary->getDetailedSummary(), 100000 /* 10% */)
175+
.MinCount;
176+
double NormalizationLowerBound = ColdCountThreshold;
177+
double NormalizedHotness =
178+
(Candidate.CallsiteCount - NormalizationLowerBound) /
179+
(NormalizationUpperBound - NormalizationLowerBound);
180+
if (NormalizedHotness > 1.0)
181+
NormalizedHotness = 1.0;
182+
// Add 1 to ensure hot callsites get a non-zero threshold, which could
183+
// happen when SampleColdCallSiteThreshold is 0. This is when we do not
184+
// want any inlining for cold callsites.
185+
SampleThreshold = SampleHotCallSiteThreshold * NormalizedHotness * 100 +
186+
SampleColdCallSiteThreshold + 1;
187+
// Bump up the threshold to favor previous compiler inline decision. The
188+
// compiler has more insight and knowledge about functions based on their IR
189+
// and attribures and should be able to make a more reasonable inline
190+
// decision.
191+
if (WasInlined)
192+
SampleThreshold *= CSPreinlMultiplierForPrevInl;
193+
}
194+
195+
return (Candidate.SizeCost < SampleThreshold);
196+
}
197+
198+
void CSPreInliner::processFunction(const FunctionId Name) {
199+
FunctionSamples *FSamples = ContextTracker.getBaseSamplesFor(Name);
200+
if (!FSamples)
201+
return;
202+
203+
unsigned FuncSize =
204+
getFuncSize(ContextTracker.getContextNodeForProfile(FSamples));
205+
unsigned FuncFinalSize = FuncSize;
206+
unsigned SizeLimit = FuncSize * ProfileInlineGrowthLimit;
207+
SizeLimit = std::min(SizeLimit, (unsigned)ProfileInlineLimitMax);
208+
SizeLimit = std::max(SizeLimit, (unsigned)ProfileInlineLimitMin);
209+
210+
LLVM_DEBUG(dbgs() << "Process " << Name
211+
<< " for context-sensitive pre-inlining (pre-inline size: "
212+
<< FuncSize << ", size limit: " << SizeLimit << ")\n");
213+
214+
ProfiledCandidateQueue CQueue;
215+
getInlineCandidates(CQueue, FSamples);
216+
217+
while (!CQueue.empty() && FuncFinalSize < SizeLimit) {
218+
ProfiledInlineCandidate Candidate = CQueue.top();
219+
CQueue.pop();
220+
bool ShouldInline = false;
221+
if ((ShouldInline = shouldInline(Candidate))) {
222+
// We mark context as inlined as the corresponding context profile
223+
// won't be merged into that function's base profile.
224+
++PreInlNumCSInlined;
225+
ContextTracker.markContextSamplesInlined(Candidate.CalleeSamples);
226+
Candidate.CalleeSamples->getContext().setAttribute(
227+
ContextShouldBeInlined);
228+
FuncFinalSize += Candidate.SizeCost;
229+
getInlineCandidates(CQueue, Candidate.CalleeSamples);
230+
} else {
231+
++PreInlNumCSNotInlined;
232+
}
233+
LLVM_DEBUG(
234+
dbgs() << (ShouldInline ? " Inlined" : " Outlined")
235+
<< " context profile for: "
236+
<< ContextTracker.getContextString(*Candidate.CalleeSamples)
237+
<< " (callee size: " << Candidate.SizeCost
238+
<< ", call count:" << Candidate.CallsiteCount << ")\n");
239+
}
240+
241+
if (!CQueue.empty()) {
242+
if (SizeLimit == (unsigned)ProfileInlineLimitMax)
243+
++PreInlNumCSInlinedHitMaxLimit;
244+
else if (SizeLimit == (unsigned)ProfileInlineLimitMin)
245+
++PreInlNumCSInlinedHitMinLimit;
246+
else
247+
++PreInlNumCSInlinedHitGrowthLimit;
248+
}
249+
250+
LLVM_DEBUG({
251+
if (!CQueue.empty())
252+
dbgs() << " Inline candidates ignored due to size limit (inliner "
253+
"original size: "
254+
<< FuncSize << ", inliner final size: " << FuncFinalSize
255+
<< ", size limit: " << SizeLimit << ")\n";
256+
257+
while (!CQueue.empty()) {
258+
ProfiledInlineCandidate Candidate = CQueue.top();
259+
CQueue.pop();
260+
bool WasInlined =
261+
Candidate.CalleeSamples->getContext().hasAttribute(ContextWasInlined);
262+
dbgs() << " "
263+
<< ContextTracker.getContextString(*Candidate.CalleeSamples)
264+
<< " (candidate size:" << Candidate.SizeCost
265+
<< ", call count: " << Candidate.CallsiteCount << ", previously "
266+
<< (WasInlined ? "inlined)\n" : "not inlined)\n");
267+
}
268+
});
269+
}
270+
271+
void CSPreInliner::run() {
272+
#ifndef NDEBUG
273+
auto printProfileNames = [](SampleContextTracker &ContextTracker,
274+
bool IsInput) {
275+
uint32_t Size = 0;
276+
for (auto *Node : ContextTracker) {
277+
FunctionSamples *FSamples = Node->getFunctionSamples();
278+
if (FSamples) {
279+
Size++;
280+
dbgs() << " [" << ContextTracker.getContextString(Node) << "] "
281+
<< FSamples->getTotalSamples() << ":"
282+
<< FSamples->getHeadSamples() << "\n";
283+
}
284+
}
285+
dbgs() << (IsInput ? "Input" : "Output") << " context-sensitive profiles ("
286+
<< Size << " total):\n";
287+
};
288+
#endif
289+
290+
LLVM_DEBUG(printProfileNames(ContextTracker, true));
291+
292+
// Execute global pre-inliner to estimate a global top-down inline
293+
// decision and merge profiles accordingly. This helps with profile
294+
// merge for ThinLTO otherwise we won't be able to merge profiles back
295+
// to base profile across module/thin-backend boundaries.
296+
// It also helps better compress context profile to control profile
297+
// size, as we now only need context profile for functions going to
298+
// be inlined.
299+
for (FunctionId FuncName : buildTopDownOrder()) {
300+
processFunction(FuncName);
301+
}
302+
303+
// Not inlined context profiles are merged into its base, so we can
304+
// trim out such profiles from the output.
305+
for (auto *Node : ContextTracker) {
306+
FunctionSamples *FProfile = Node->getFunctionSamples();
307+
if (FProfile &&
308+
(Node->getParentContext() != &ContextTracker.getRootContext() &&
309+
!FProfile->getContext().hasState(InlinedContext))) {
310+
Node->setFunctionSamples(nullptr);
311+
}
312+
}
313+
FunctionSamples::ProfileIsPreInlined = true;
314+
315+
LLVM_DEBUG(printProfileNames(ContextTracker, false));
316+
}

0 commit comments

Comments
 (0)