diff --git a/eng/pipelines/common/templates/runtimes/run-test-job.yml b/eng/pipelines/common/templates/runtimes/run-test-job.yml index 9e75af0ac8f6bf..d977f6c692efa1 100644 --- a/eng/pipelines/common/templates/runtimes/run-test-job.yml +++ b/eng/pipelines/common/templates/runtimes/run-test-job.yml @@ -227,13 +227,13 @@ jobs: timeoutInMinutes: 300 ${{ else }}: timeoutInMinutes: 200 - ${{ if in(parameters.testGroup, 'outerloop', 'jit-experimental', 'pgo', 'jit-cfg') }}: + ${{ if in(parameters.testGroup, 'outerloop', 'jit-experimental', 'jit-cfg') }}: timeoutInMinutes: 270 ${{ if in(parameters.testGroup, 'gc-longrunning', 'gc-simulator') }}: timeoutInMinutes: 480 ${{ if in(parameters.testGroup, 'jitstress', 'jitstress-isas-arm', 'jitstressregs-x86', 'jitstressregs', 'jitstress2-jitstressregs', 'gcstress0x3-gcstress0xc', 'ilasm') }}: timeoutInMinutes: 390 - ${{ if in(parameters.testGroup, 'gcstress-extra', 'r2r-extra', 'clrinterpreter') }}: + ${{ if in(parameters.testGroup, 'gcstress-extra', 'r2r-extra', 'clrinterpreter', 'pgo') }}: timeoutInMinutes: 510 ${{ if eq(parameters.testGroup, 'jitstress-isas-x86') }}: timeoutInMinutes: 960 @@ -397,7 +397,7 @@ jobs: ${{ if eq(parameters.runtimeFlavor, 'mono') }}: # tiered compilation isn't done on mono yet scenarios: - - normal + - normal ${{ elseif eq(variables['Build.Reason'], 'PullRequest') }}: scenarios: - no_tiered_compilation @@ -545,7 +545,9 @@ jobs: - defaultpgo - dynamicpgo - fullpgo + - fullpgo_methodprofiling - fullpgo_random_gdv + - fullpgo_random_gdv_methodprofiling_only - fullpgo_random_edge - fullpgo_random_gdv_edge ${{ if in(parameters.testGroup, 'gc-longrunning') }}: @@ -568,7 +570,6 @@ jobs: - jitelthookenabled_tiered ${{ if in(parameters.testGroup, 'jit-experimental') }}: scenarios: - - jitosr - jitosr_stress - jitosr_pgo - jitosr_stress_random diff --git a/eng/pipelines/coreclr/libraries-pgo.yml b/eng/pipelines/coreclr/libraries-pgo.yml index 0914451b55ec66..0a3346141744f9 100644 --- a/eng/pipelines/coreclr/libraries-pgo.yml +++ b/eng/pipelines/coreclr/libraries-pgo.yml @@ -47,7 +47,7 @@ jobs: helixQueueGroup: libraries helixQueuesTemplate: /eng/pipelines/coreclr/templates/helix-queues-setup.yml jobParameters: - timeoutInMinutes: 150 + timeoutInMinutes: 600 testScope: innerloop liveRuntimeBuildConfig: checked dependsOnTestBuildConfiguration: Release diff --git a/eng/pipelines/libraries/run-test-job.yml b/eng/pipelines/libraries/run-test-job.yml index 17e3a63875d176..c6fbe831a4d9bd 100644 --- a/eng/pipelines/libraries/run-test-job.yml +++ b/eng/pipelines/libraries/run-test-job.yml @@ -173,10 +173,11 @@ jobs: - defaultpgo - dynamicpgo - fullpgo + - fullpgo_methodprofiling - fullpgo_random_gdv + - fullpgo_random_gdv_methodprofiling_only - fullpgo_random_edge - fullpgo_random_gdv_edge - - jitosr - jitosr_stress - jitosr_stress_random - jitosr_pgo diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 8cb56a69a17038..91c5734c905881 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -640,9 +640,14 @@ enum CorInfoHelpFunc CORINFO_HELP_STACK_PROBE, // Probes each page of the allocated stack frame CORINFO_HELP_PATCHPOINT, // Notify runtime that code has reached a patchpoint + CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT, // Notify runtime that code has reached a part of the method that wasn't originally jitted. + CORINFO_HELP_CLASSPROFILE32, // Update 32-bit class profile for a call site CORINFO_HELP_CLASSPROFILE64, // Update 64-bit class profile for a call site - CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT, // Notify runtime that code has reached a part of the method that wasn't originally jitted. + CORINFO_HELP_DELEGATEPROFILE32, // Update 32-bit method profile for a delegate call site + CORINFO_HELP_DELEGATEPROFILE64, // Update 64-bit method profile for a delegate call site + CORINFO_HELP_VTABLEPROFILE32, // Update 32-bit method profile for a vtable call site + CORINFO_HELP_VTABLEPROFILE64, // Update 64-bit method profile for a vtable call site CORINFO_HELP_VALIDATE_INDIRECT_CALL, // CFG: Validate function pointer CORINFO_HELP_DISPATCH_INDIRECT_CALL, // CFG: Validate and dispatch to pointer diff --git a/src/coreclr/inc/corjit.h b/src/coreclr/inc/corjit.h index 54aaded8f90187..380db270e1cbf5 100644 --- a/src/coreclr/inc/corjit.h +++ b/src/coreclr/inc/corjit.h @@ -330,7 +330,8 @@ class ICorJitInfo : public ICorDynamicInfo // Data structure for a single class probe using 32-bit count. // - // CLASS_FLAG and INTERFACE_FLAG are placed into the Other field in the schema + // CLASS_FLAG, INTERFACE_FLAG and DELEGATE_FLAG are placed into the Other field in the schema. + // If CLASS_FLAG is set the handle table consists of type handles, and otherwise method handles. // // Count is the number of times a call was made at that call site. // @@ -338,8 +339,8 @@ class ICorJitInfo : public ICorDynamicInfo // // SAMPLE_INTERVAL must be >= SIZE. SAMPLE_INTERVAL / SIZE // gives the average number of calls between table updates. - // - struct ClassProfile32 + // + struct HandleHistogram32 { enum { @@ -347,17 +348,18 @@ class ICorJitInfo : public ICorDynamicInfo SAMPLE_INTERVAL = 32, CLASS_FLAG = 0x80000000, INTERFACE_FLAG = 0x40000000, - OFFSET_MASK = 0x3FFFFFFF + DELEGATE_FLAG = 0x20000000, + OFFSET_MASK = 0x0FFFFFFF }; uint32_t Count; - CORINFO_CLASS_HANDLE ClassTable[SIZE]; + void* HandleTable[SIZE]; }; - struct ClassProfile64 + struct HandleHistogram64 { uint64_t Count; - CORINFO_CLASS_HANDLE ClassTable[ClassProfile32::SIZE]; + void* HandleTable[HandleHistogram32::SIZE]; }; enum class PgoInstrumentationKind @@ -387,7 +389,7 @@ class ICorJitInfo : public ICorDynamicInfo Done = None, // All instrumentation schemas must end with a record which is "Done" BasicBlockIntCount = (DescriptorMin * 1) | FourByte, // basic block counter using unsigned 4 byte int BasicBlockLongCount = (DescriptorMin * 1) | EightByte, // basic block counter using unsigned 8 byte int - HandleHistogramIntCount = (DescriptorMin * 2) | FourByte | AlignPointer, // 4 byte counter that is part of a type histogram. Aligned to match ClassProfile32's alignment. + HandleHistogramIntCount = (DescriptorMin * 2) | FourByte | AlignPointer, // 4 byte counter that is part of a type histogram. Aligned to match HandleHistogram32's alignment. HandleHistogramLongCount = (DescriptorMin * 2) | EightByte, // 8 byte counter that is part of a type histogram HandleHistogramTypes = (DescriptorMin * 3) | TypeHandle, // Histogram of type handles HandleHistogramMethods = (DescriptorMin * 3) | MethodHandle, // Histogram of method handles @@ -396,6 +398,7 @@ class ICorJitInfo : public ICorDynamicInfo EdgeIntCount = (DescriptorMin * 6) | FourByte, // edge counter using unsigned 4 byte int EdgeLongCount = (DescriptorMin * 6) | EightByte, // edge counter using unsigned 8 byte int GetLikelyClass = (DescriptorMin * 7) | TypeHandle, // Compressed get likely class data + GetLikelyMethod = (DescriptorMin * 7) | MethodHandle, // Compressed get likely method data }; struct PgoInstrumentationSchema @@ -418,7 +421,7 @@ class ICorJitInfo : public ICorDynamicInfo Sampling= 6, // PGO data derived from sampling }; -#define DEFAULT_UNKNOWN_TYPEHANDLE 1 +#define DEFAULT_UNKNOWN_HANDLE 1 #define UNKNOWN_HANDLE_MIN 1 #define UNKNOWN_HANDLE_MAX 33 diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 27f75a486cb53c..9a6cbc053e1ce6 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* af5b6632-6fbe-4a2e-82d6-24487a138e4a */ - 0xaf5b6632, - 0x6fbe, - 0x4a2e, - {0x82, 0xd6, 0x24, 0x48, 0x7a, 0x13, 0x8e, 0x4a} +constexpr GUID JITEEVersionIdentifier = { /* f2faa5fc-a1ec-4244-aebb-5597bfd7153a */ + 0xf2faa5fc, + 0xa1ec, + 0x4244, + {0xae, 0xbb, 0x55, 0x97, 0xbf, 0xd7, 0x15, 0x3a} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index e40eb4105ee3c0..a500c298978b67 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -328,9 +328,14 @@ #endif JITHELPER(CORINFO_HELP_PATCHPOINT, JIT_Patchpoint, CORINFO_HELP_SIG_REG_ONLY) + JITHELPER(CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT, JIT_PartialCompilationPatchpoint, CORINFO_HELP_SIG_REG_ONLY) + JITHELPER(CORINFO_HELP_CLASSPROFILE32, JIT_ClassProfile32, CORINFO_HELP_SIG_REG_ONLY) JITHELPER(CORINFO_HELP_CLASSPROFILE64, JIT_ClassProfile64, CORINFO_HELP_SIG_REG_ONLY) - JITHELPER(CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT, JIT_PartialCompilationPatchpoint, CORINFO_HELP_SIG_REG_ONLY) + JITHELPER(CORINFO_HELP_DELEGATEPROFILE32, JIT_DelegateProfile32, CORINFO_HELP_SIG_REG_ONLY) + JITHELPER(CORINFO_HELP_DELEGATEPROFILE64, JIT_DelegateProfile64, CORINFO_HELP_SIG_REG_ONLY) + JITHELPER(CORINFO_HELP_VTABLEPROFILE32, JIT_VTableProfile32, CORINFO_HELP_SIG_4_STACK) + JITHELPER(CORINFO_HELP_VTABLEPROFILE64, JIT_VTableProfile64, CORINFO_HELP_SIG_4_STACK) #if defined(TARGET_AMD64) || defined(TARGET_ARM64) JITHELPER(CORINFO_HELP_VALIDATE_INDIRECT_CALL, JIT_ValidateIndirectCall, CORINFO_HELP_SIG_REG_ONLY) diff --git a/src/coreclr/inc/readytorun.h b/src/coreclr/inc/readytorun.h index 76354362d57d83..20db29298cfba1 100644 --- a/src/coreclr/inc/readytorun.h +++ b/src/coreclr/inc/readytorun.h @@ -16,7 +16,7 @@ // Keep these in sync with src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs #define READYTORUN_MAJOR_VERSION 0x0006 -#define READYTORUN_MINOR_VERSION 0x0001 +#define READYTORUN_MINOR_VERSION 0x0002 #define MINIMUM_READYTORUN_MAJOR_VERSION 0x006 diff --git a/src/coreclr/jit/ClrJit.PAL.exports b/src/coreclr/jit/ClrJit.PAL.exports index 2625e98bc421e7..e4e6064db84e89 100644 --- a/src/coreclr/jit/ClrJit.PAL.exports +++ b/src/coreclr/jit/ClrJit.PAL.exports @@ -1,4 +1,5 @@ getJit jitStartup getLikelyClasses +getLikelyMethods jitBuildString diff --git a/src/coreclr/jit/ClrJit.exports b/src/coreclr/jit/ClrJit.exports index c6a22db4cae403..5430f7b165929d 100644 --- a/src/coreclr/jit/ClrJit.exports +++ b/src/coreclr/jit/ClrJit.exports @@ -5,4 +5,5 @@ EXPORTS getJit jitStartup getLikelyClasses + getLikelyMethods jitBuildString diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index 88dcb79794fb87..e9a539a2f35fa6 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -526,32 +526,32 @@ enum BasicBlockFlags : unsigned __int64 #endif // defined(FEATURE_EH_FUNCLETS) && defined(TARGET_ARM) - BBF_BACKWARD_JUMP = MAKE_BBFLAG(24), // BB is surrounded by a backward jump/switch arc - BBF_RETLESS_CALL = MAKE_BBFLAG(25), // BBJ_CALLFINALLY that will never return (and therefore, won't need a paired - // BBJ_ALWAYS); see isBBCallAlwaysPair(). - BBF_LOOP_PREHEADER = MAKE_BBFLAG(26), // BB is a loop preheader block - BBF_COLD = MAKE_BBFLAG(27), // BB is cold - - BBF_PROF_WEIGHT = MAKE_BBFLAG(28), // BB weight is computed from profile data - BBF_IS_LIR = MAKE_BBFLAG(29), // Set if the basic block contains LIR (as opposed to HIR) - BBF_KEEP_BBJ_ALWAYS = MAKE_BBFLAG(30), // A special BBJ_ALWAYS block, used by EH code generation. Keep the jump kind - // as BBJ_ALWAYS. Used for the paired BBJ_ALWAYS block following the - // BBJ_CALLFINALLY block, as well as, on x86, the final step block out of a - // finally. - BBF_CLONED_FINALLY_BEGIN = MAKE_BBFLAG(31), // First block of a cloned finally region - - BBF_CLONED_FINALLY_END = MAKE_BBFLAG(32), // Last block of a cloned finally region - BBF_HAS_CALL = MAKE_BBFLAG(33), // BB contains a call + BBF_BACKWARD_JUMP = MAKE_BBFLAG(24), // BB is surrounded by a backward jump/switch arc + BBF_RETLESS_CALL = MAKE_BBFLAG(25), // BBJ_CALLFINALLY that will never return (and therefore, won't need a paired + // BBJ_ALWAYS); see isBBCallAlwaysPair(). + BBF_LOOP_PREHEADER = MAKE_BBFLAG(26), // BB is a loop preheader block + BBF_COLD = MAKE_BBFLAG(27), // BB is cold + + BBF_PROF_WEIGHT = MAKE_BBFLAG(28), // BB weight is computed from profile data + BBF_IS_LIR = MAKE_BBFLAG(29), // Set if the basic block contains LIR (as opposed to HIR) + BBF_KEEP_BBJ_ALWAYS = MAKE_BBFLAG(30), // A special BBJ_ALWAYS block, used by EH code generation. Keep the jump kind + // as BBJ_ALWAYS. Used for the paired BBJ_ALWAYS block following the + // BBJ_CALLFINALLY block, as well as, on x86, the final step block out of a + // finally. + BBF_CLONED_FINALLY_BEGIN = MAKE_BBFLAG(31), // First block of a cloned finally region + + BBF_CLONED_FINALLY_END = MAKE_BBFLAG(32), // Last block of a cloned finally region + BBF_HAS_CALL = MAKE_BBFLAG(33), // BB contains a call BBF_DOMINATED_BY_EXCEPTIONAL_ENTRY = MAKE_BBFLAG(34), // Block is dominated by exceptional entry. - BBF_BACKWARD_JUMP_TARGET = MAKE_BBFLAG(35), // Block is a target of a backward jump + BBF_BACKWARD_JUMP_TARGET = MAKE_BBFLAG(35), // Block is a target of a backward jump - BBF_PATCHPOINT = MAKE_BBFLAG(36), // Block is a patchpoint - BBF_HAS_CLASS_PROFILE = MAKE_BBFLAG(37), // BB contains a call needing a class profile - BBF_PARTIAL_COMPILATION_PATCHPOINT = MAKE_BBFLAG(38), // Block is a partial compilation patchpoint - BBF_HAS_ALIGN = MAKE_BBFLAG(39), // BB ends with 'align' instruction - BBF_TAILCALL_SUCCESSOR = MAKE_BBFLAG(40), // BB has pred that has potential tail call + BBF_PATCHPOINT = MAKE_BBFLAG(36), // Block is a patchpoint + BBF_HAS_HISTOGRAM_PROFILE = MAKE_BBFLAG(37), // BB contains a call needing a histogram profile + BBF_PARTIAL_COMPILATION_PATCHPOINT = MAKE_BBFLAG(38), // Block is a partial compilation patchpoint + BBF_HAS_ALIGN = MAKE_BBFLAG(39), // BB ends with 'align' instruction + BBF_TAILCALL_SUCCESSOR = MAKE_BBFLAG(40), // BB has pred that has potential tail call - BBF_BACKWARD_JUMP_SOURCE = MAKE_BBFLAG(41), // Block is a source of a backward jump + BBF_BACKWARD_JUMP_SOURCE = MAKE_BBFLAG(41), // Block is a source of a backward jump // The following are sets of flags. @@ -582,7 +582,7 @@ enum BasicBlockFlags : unsigned __int64 // TODO: Should BBF_RUN_RARELY be added to BBF_SPLIT_GAINED ? BBF_SPLIT_GAINED = BBF_DONT_REMOVE | BBF_HAS_JMP | BBF_BACKWARD_JUMP | BBF_HAS_IDX_LEN | BBF_HAS_NEWARRAY | BBF_PROF_WEIGHT | \ - BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_HAS_NULLCHECK | BBF_HAS_CLASS_PROFILE, + BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END | BBF_HAS_NULLCHECK | BBF_HAS_HISTOGRAM_PROFILE, }; inline constexpr BasicBlockFlags operator ~(BasicBlockFlags a) @@ -918,8 +918,8 @@ struct BasicBlock : private LIR::Range }; union { - unsigned bbStkTempsOut; // base# for output stack temps - int bbClassSchemaIndex; // schema index for class instrumentation + unsigned bbStkTempsOut; // base# for output stack temps + int bbHistogramSchemaIndex; // schema index for histogram instrumentation }; #define MAX_XCPTN_INDEX (USHRT_MAX - 1) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 652d560409c0f5..30515914f0480d 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -6384,10 +6384,10 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, compHndBBtabCount = 0; compHndBBtabAllocCount = 0; - info.compNativeCodeSize = 0; - info.compTotalHotCodeSize = 0; - info.compTotalColdCodeSize = 0; - info.compClassProbeCount = 0; + info.compNativeCodeSize = 0; + info.compTotalHotCodeSize = 0; + info.compTotalColdCodeSize = 0; + info.compHandleHistogramProbeCount = 0; compHasBackwardJump = false; compHasBackwardJumpInHandler = false; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index a7ece7e80e9cd3..54abb9b8bd3e33 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1819,6 +1819,7 @@ class Compiler friend class MorphInitBlockHelper; friend class MorphCopyBlockHelper; friend class CallArgs; + friend class IndirectCallTransformer; #ifdef FEATURE_HW_INTRINSICS friend struct HWIntrinsicInfo; @@ -3537,6 +3538,18 @@ class Compiler bool isExplicitTailCall, IL_OFFSET ilOffset = BAD_IL_OFFSET); + bool impConsiderCallProbe(GenTreeCall* call, IL_OFFSET ilOffset); + + enum class GDVProbeType + { + None, + ClassProfile, + MethodProfile, + MethodAndClassProfile, + }; + + GDVProbeType compClassifyGDVProbeType(GenTreeCall* call); + //========================================================================= // PROTECTED //========================================================================= @@ -5425,7 +5438,7 @@ class Compiler bool fgGetProfileWeightForBasicBlock(IL_OFFSET offset, weight_t* weight); Instrumentor* fgCountInstrumentor; - Instrumentor* fgClassInstrumentor; + Instrumentor* fgHistogramInstrumentor; PhaseStatus fgPrepareToInstrumentMethod(); PhaseStatus fgInstrumentMethod(); @@ -5433,11 +5446,13 @@ class Compiler void fgIncorporateBlockCounts(); void fgIncorporateEdgeCounts(); - CORINFO_CLASS_HANDLE getRandomClass(ICorJitInfo::PgoInstrumentationSchema* schema, - UINT32 countSchemaItems, - BYTE* pInstrumentationData, - int32_t ilOffset, - CLRRandom* random); + void getRandomGDV(ICorJitInfo::PgoInstrumentationSchema* schema, + UINT32 countSchemaItems, + BYTE* pInstrumentationData, + int32_t ilOffset, + CLRRandom* random, + CORINFO_CLASS_HANDLE* classGuess, + CORINFO_METHOD_HANDLE* methodGuess); public: const char* fgPgoFailReason; @@ -5677,6 +5692,7 @@ class Compiler Statement* paramAssignmentInsertionPoint); GenTree* fgMorphCall(GenTreeCall* call); GenTree* fgExpandVirtualVtableCallTarget(GenTreeCall* call); + void fgMorphCallInline(GenTreeCall* call, InlineResult* result); void fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result, InlineContext** createdContext); #if DEBUG @@ -6795,13 +6811,21 @@ class Compiler optMethodFlags |= OMF_HAS_GUARDEDDEVIRT; } + void pickGDV(GenTreeCall* call, + IL_OFFSET ilOffset, + bool isInterface, + CORINFO_CLASS_HANDLE* classGuess, + CORINFO_METHOD_HANDLE* methodGuess, + unsigned* likelihood); + void considerGuardedDevirtualization(GenTreeCall* call, IL_OFFSET ilOffset, bool isInterface, CORINFO_METHOD_HANDLE baseMethod, CORINFO_CLASS_HANDLE baseClass, - CORINFO_CONTEXT_HANDLE* pContextHandle DEBUGARG(CORINFO_CLASS_HANDLE objClass) - DEBUGARG(const char* objClassName)); + CORINFO_CONTEXT_HANDLE* pContextHandle); + + bool isCompatibleMethodGDV(GenTreeCall* call, CORINFO_METHOD_HANDLE gdvTarget); void addGuardedDevirtualizationCandidate(GenTreeCall* call, CORINFO_METHOD_HANDLE methodHandle, @@ -9537,7 +9561,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX unsigned genCPU; // What CPU are we running on // Number of class profile probes in this method - unsigned compClassProbeCount; + unsigned compHandleHistogramProbeCount; } info; diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 148dc02af96493..541bdc27e7c962 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -186,7 +186,7 @@ void Compiler::fgInit() fgPgoInlineeNoPgo = 0; fgPgoInlineeNoPgoSingleBlock = 0; fgCountInstrumentor = nullptr; - fgClassInstrumentor = nullptr; + fgHistogramInstrumentor = nullptr; fgPredListSortVector = nullptr; } diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index bfd7000486c8bd..4eb555a65ca94e 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -1425,11 +1425,11 @@ void EfficientEdgeCountInstrumentor::Instrument(BasicBlock* block, Schema& schem } //------------------------------------------------------------------------ -// ClassProbeVisitor: invoke functor on each virtual call or cast-related +// HandleHistogramProbeVisitor: invoke functor on each virtual call or cast-related // helper calls in a tree // template -class ClassProbeVisitor final : public GenTreeVisitor> +class HandleHistogramProbeVisitor final : public GenTreeVisitor> { public: enum @@ -1440,26 +1440,17 @@ class ClassProbeVisitor final : public GenTreeVisitor(compiler), m_functor(functor), m_compiler(compiler) + HandleHistogramProbeVisitor(Compiler* compiler, TFunctor& functor) + : GenTreeVisitor(compiler), m_functor(functor), m_compiler(compiler) { } Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user) { GenTree* const node = *use; - if (node->IsCall() && (node->AsCall()->gtClassProfileCandidateInfo != nullptr)) + if (node->IsCall() && (m_compiler->compClassifyGDVProbeType(node->AsCall()) != Compiler::GDVProbeType::None)) { - GenTreeCall* const call = node->AsCall(); - if (call->IsVirtual() && (call->gtCallType != CT_INDIRECT)) - { - // virtual call - m_functor(m_compiler, call); - } - else if (m_compiler->impIsCastHelperEligibleForClassProbe(call)) - { - // isinst/cast helper - m_functor(m_compiler, call); - } + assert(node->AsCall()->gtHandleHistogramProfileCandidateInfo != nullptr); + m_functor(m_compiler, node->AsCall()); } return Compiler::WALK_CONTINUE; @@ -1467,44 +1458,65 @@ class ClassProbeVisitor final : public GenTreeVisitorcompClassifyGDVProbeType(call); + + if ((probeType == Compiler::GDVProbeType::ClassProfile) || + (probeType == Compiler::GDVProbeType::MethodAndClassProfile)) + { + CreateHistogramSchemaEntries(compiler, call, true /* isTypeHistogram */); + } + + if ((probeType == Compiler::GDVProbeType::MethodProfile) || + (probeType == Compiler::GDVProbeType::MethodAndClassProfile)) + { + CreateHistogramSchemaEntries(compiler, call, false /* isTypeHistogram */); + } + } + + void CreateHistogramSchemaEntries(Compiler* compiler, GenTreeCall* call, bool isTypeHistogram) + { + ICorJitInfo::PgoInstrumentationSchema schemaElem = {}; + schemaElem.Count = 1; + schemaElem.Other = isTypeHistogram ? ICorJitInfo::HandleHistogram32::CLASS_FLAG : 0; if (call->IsVirtualStub()) { - schemaElem.Other |= ICorJitInfo::ClassProfile32::INTERFACE_FLAG; + schemaElem.Other |= ICorJitInfo::HandleHistogram32::INTERFACE_FLAG; } - else + else if (call->IsDelegateInvoke()) { - assert(call->IsVirtualVtable() || compiler->impIsCastHelperEligibleForClassProbe(call)); + schemaElem.Other |= ICorJitInfo::HandleHistogram32::DELEGATE_FLAG; } schemaElem.InstrumentationKind = JitConfig.JitCollect64BitCounts() ? ICorJitInfo::PgoInstrumentationKind::HandleHistogramLongCount : ICorJitInfo::PgoInstrumentationKind::HandleHistogramIntCount; - schemaElem.ILOffset = (int32_t)call->gtClassProfileCandidateInfo->ilOffset; + schemaElem.ILOffset = (int32_t)call->gtHandleHistogramProfileCandidateInfo->ilOffset; schemaElem.Offset = 0; m_schema.push_back(schemaElem); + m_schemaCount++; + // Re-using ILOffset and Other fields from schema item for TypeHandleHistogramCount - schemaElem.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes; - schemaElem.Count = ICorJitInfo::ClassProfile32::SIZE; + schemaElem.InstrumentationKind = isTypeHistogram ? ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes + : ICorJitInfo::PgoInstrumentationKind::HandleHistogramMethods; + schemaElem.Count = ICorJitInfo::HandleHistogram32::SIZE; m_schema.push_back(schemaElem); m_schemaCount++; @@ -1512,9 +1524,9 @@ class BuildClassProbeSchemaGen }; //------------------------------------------------------------------------ -// ClassProbeInserter: functor that adds class probe instrumentation +// HandleHistogramProbeInserter: functor that adds class/method probe instrumentation // -class ClassProbeInserter +class HandleHistogramProbeInserter { Schema& m_schema; uint8_t* m_profileMemory; @@ -1522,7 +1534,7 @@ class ClassProbeInserter unsigned& m_instrCount; public: - ClassProbeInserter(Schema& schema, uint8_t* profileMemory, int* pCurrentSchemaIndex, unsigned& instrCount) + HandleHistogramProbeInserter(Schema& schema, uint8_t* profileMemory, int* pCurrentSchemaIndex, unsigned& instrCount) : m_schema(schema) , m_profileMemory(profileMemory) , m_currentSchemaIndex(pCurrentSchemaIndex) @@ -1533,10 +1545,11 @@ class ClassProbeInserter void operator()(Compiler* compiler, GenTreeCall* call) { JITDUMP("Found call [%06u] with probe index %d and ilOffset 0x%X\n", compiler->dspTreeID(call), - call->gtClassProfileCandidateInfo->probeIndex, call->gtClassProfileCandidateInfo->ilOffset); + call->gtHandleHistogramProfileCandidateInfo->probeIndex, + call->gtHandleHistogramProfileCandidateInfo->ilOffset); // We transform the call from (CALLVIRT obj, ... args ...) to - // to + // // (CALLVIRT // (COMMA // (ASG tmp, obj) @@ -1546,19 +1559,25 @@ class ClassProbeInserter // ... args ...) // - // Sanity check that we're looking at the right schema entry - // - assert(m_schema[*m_currentSchemaIndex].ILOffset == (int32_t)call->gtClassProfileCandidateInfo->ilOffset); - bool is32 = m_schema[*m_currentSchemaIndex].InstrumentationKind == - ICorJitInfo::PgoInstrumentationKind::HandleHistogramIntCount; - bool is64 = m_schema[*m_currentSchemaIndex].InstrumentationKind == - ICorJitInfo::PgoInstrumentationKind::HandleHistogramLongCount; - assert(is32 || is64); - - // Figure out where the table is located. - // - uint8_t* classProfile = m_schema[*m_currentSchemaIndex].Offset + m_profileMemory; - *m_currentSchemaIndex += 2; // There are 2 schema entries per class probe + // Read histograms + void* typeHistogram = nullptr; + void* methodHistogram = nullptr; + + bool is32; + ReadHistogramAndAdvance(call->gtHandleHistogramProfileCandidateInfo->ilOffset, &typeHistogram, &methodHistogram, + &is32); + bool secondIs32; + ReadHistogramAndAdvance(call->gtHandleHistogramProfileCandidateInfo->ilOffset, &typeHistogram, &methodHistogram, + &secondIs32); + + assert(((typeHistogram != nullptr) || (methodHistogram != nullptr)) && + "Expected at least one handle histogram when inserting probes"); + + if ((typeHistogram != nullptr) && (methodHistogram != nullptr)) + { + // We expect both histograms to be 32-bit or 64-bit, not a mix. + assert(is32 == secondIs32); + } assert(!call->gtArgs.AreArgsComplete()); CallArg* objUse = nullptr; @@ -1576,20 +1595,57 @@ class ClassProbeInserter // Grab a temp to hold the 'this' object as it will be used three times // - unsigned const tmpNum = compiler->lvaGrabTemp(true DEBUGARG("class profile tmp")); + unsigned const tmpNum = compiler->lvaGrabTemp(true DEBUGARG("handle histogram profile tmp")); compiler->lvaTable[tmpNum].lvType = TYP_REF; + GenTree* helperCallNode = nullptr; + + if (typeHistogram != nullptr) + { + GenTree* const tmpNode = compiler->gtNewLclvNode(tmpNum, TYP_REF); + GenTree* const classProfileNode = compiler->gtNewIconNode((ssize_t)typeHistogram, TYP_I_IMPL); + helperCallNode = + compiler->gtNewHelperCallNode(is32 ? CORINFO_HELP_CLASSPROFILE32 : CORINFO_HELP_CLASSPROFILE64, + TYP_VOID, tmpNode, classProfileNode); + } + + if (methodHistogram != nullptr) + { + GenTree* const tmpNode = compiler->gtNewLclvNode(tmpNum, TYP_REF); + GenTree* const methodProfileNode = compiler->gtNewIconNode((ssize_t)methodHistogram, TYP_I_IMPL); + + GenTree* methodProfileCallNode; + if (call->IsDelegateInvoke()) + { + methodProfileCallNode = compiler->gtNewHelperCallNode(is32 ? CORINFO_HELP_DELEGATEPROFILE32 + : CORINFO_HELP_DELEGATEPROFILE64, + TYP_VOID, tmpNode, methodProfileNode); + } + else + { + assert(call->IsVirtualVtable()); + GenTree* const baseMethodNode = compiler->gtNewIconEmbMethHndNode(call->gtCallMethHnd); + methodProfileCallNode = + compiler->gtNewHelperCallNode(is32 ? CORINFO_HELP_VTABLEPROFILE32 : CORINFO_HELP_VTABLEPROFILE64, + TYP_VOID, tmpNode, baseMethodNode, methodProfileNode); + } + + if (helperCallNode == nullptr) + { + helperCallNode = methodProfileCallNode; + } + else + { + helperCallNode = compiler->gtNewOperNode(GT_COMMA, TYP_REF, helperCallNode, methodProfileCallNode); + } + } + // Generate the IR... // - GenTree* const classProfileNode = compiler->gtNewIconNode((ssize_t)classProfile, TYP_I_IMPL); - GenTree* const tmpNode = compiler->gtNewLclvNode(tmpNum, TYP_REF); - GenTreeCall* const helperCallNode = - compiler->gtNewHelperCallNode(is32 ? CORINFO_HELP_CLASSPROFILE32 : CORINFO_HELP_CLASSPROFILE64, TYP_VOID, - tmpNode, classProfileNode); GenTree* const tmpNode2 = compiler->gtNewLclvNode(tmpNum, TYP_REF); GenTree* const callCommaNode = compiler->gtNewOperNode(GT_COMMA, TYP_REF, helperCallNode, tmpNode2); GenTree* const tmpNode3 = compiler->gtNewLclvNode(tmpNum, TYP_REF); - GenTree* const asgNode = compiler->gtNewOperNode(GT_ASG, TYP_REF, tmpNode3, objUse->GetEarlyNode()); + GenTree* const asgNode = compiler->gtNewOperNode(GT_ASG, TYP_REF, tmpNode3, objUse->GetNode()); GenTree* const asgCommaNode = compiler->gtNewOperNode(GT_COMMA, TYP_REF, asgNode, callCommaNode); // Update the call @@ -1601,16 +1657,78 @@ class ClassProbeInserter m_instrCount++; } + +private: + void ReadHistogramAndAdvance(IL_OFFSET ilOffset, void** typeHistogram, void** methodHistogram, bool* histogramIs32) + { + if (*m_currentSchemaIndex >= (int)m_schema.size()) + { + return; + } + + ICorJitInfo::PgoInstrumentationSchema& countEntry = m_schema[*m_currentSchemaIndex]; + + bool is32 = countEntry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramIntCount; + bool is64 = countEntry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramLongCount; + if (!is32 && !is64) + { + return; + } + + if (countEntry.ILOffset != static_cast(ilOffset)) + { + return; + } + + assert(*m_currentSchemaIndex + 2 <= (int)m_schema.size()); + ICorJitInfo::PgoInstrumentationSchema& tableEntry = m_schema[*m_currentSchemaIndex + 1]; + assert((tableEntry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes) || + (tableEntry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramMethods)); + + void** outHistogram; + if (tableEntry.InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes) + { + assert(*typeHistogram == nullptr); + outHistogram = typeHistogram; + } + else + { + assert(*methodHistogram == nullptr); + outHistogram = methodHistogram; + } + + *outHistogram = &m_profileMemory[countEntry.Offset]; + *histogramIs32 = is32; + +#ifdef DEBUG + if (is32) + { + ICorJitInfo::HandleHistogram32* h32 = + reinterpret_cast(&m_profileMemory[countEntry.Offset]); + assert(reinterpret_cast(&h32->Count) == &m_profileMemory[countEntry.Offset]); + assert(reinterpret_cast(h32->HandleTable) == &m_profileMemory[tableEntry.Offset]); + } + else + { + ICorJitInfo::HandleHistogram64* h64 = + reinterpret_cast(&m_profileMemory[countEntry.Offset]); + assert(reinterpret_cast(&h64->Count) == &m_profileMemory[countEntry.Offset]); + assert(reinterpret_cast(h64->HandleTable) == &m_profileMemory[tableEntry.Offset]); + } +#endif + + *m_currentSchemaIndex += 2; + } }; //------------------------------------------------------------------------ -// ClassProbeInstrumentor: instrumentor that adds a class probe to each +// HandleHistogramProbeInstrumentor: instrumentor that adds a class probe to each // virtual call in the basic block // -class ClassProbeInstrumentor : public Instrumentor +class HandleHistogramProbeInstrumentor : public Instrumentor { public: - ClassProbeInstrumentor(Compiler* comp) : Instrumentor(comp) + HandleHistogramProbeInstrumentor(Compiler* comp) : Instrumentor(comp) { } bool ShouldProcess(BasicBlock* block) override @@ -1623,13 +1741,13 @@ class ClassProbeInstrumentor : public Instrumentor }; //------------------------------------------------------------------------ -// ClassProbeInstrumentor::Prepare: prepare for class instrumentation +// HandleHistogramProbeInstrumentor::Prepare: prepare for class instrumentation // // Arguments: // preImport - true if this is the prepare call that happens before // importation // -void ClassProbeInstrumentor::Prepare(bool isPreImport) +void HandleHistogramProbeInstrumentor::Prepare(bool isPreImport) { if (isPreImport) { @@ -1641,33 +1759,33 @@ void ClassProbeInstrumentor::Prepare(bool isPreImport) // for (BasicBlock* const block : m_comp->Blocks()) { - block->bbClassSchemaIndex = -1; + block->bbHistogramSchemaIndex = -1; } #endif } //------------------------------------------------------------------------ -// ClassProbeInstrumentor::BuildSchemaElements: create schema elements for a class probe +// HandleHistogramProbeInstrumentor::BuildSchemaElements: create schema elements for a class probe // // Arguments: // block -- block to instrument // schema -- schema that we're building // -void ClassProbeInstrumentor::BuildSchemaElements(BasicBlock* block, Schema& schema) +void HandleHistogramProbeInstrumentor::BuildSchemaElements(BasicBlock* block, Schema& schema) { - if ((block->bbFlags & BBF_HAS_CLASS_PROFILE) == 0) + if ((block->bbFlags & BBF_HAS_HISTOGRAM_PROFILE) == 0) { return; } // Remember the schema index for this block. // - block->bbClassSchemaIndex = (int)schema.size(); + block->bbHistogramSchemaIndex = (int)schema.size(); // Scan the statements and identify the class probes // - BuildClassProbeSchemaGen schemaGen(schema, m_schemaCount); - ClassProbeVisitor visitor(m_comp, schemaGen); + BuildHandleHistogramProbeSchemaGen schemaGen(schema, m_schemaCount); + HandleHistogramProbeVisitor visitor(m_comp, schemaGen); for (Statement* const stmt : block->Statements()) { visitor.WalkTree(stmt->GetRootNodePointer(), nullptr); @@ -1675,16 +1793,16 @@ void ClassProbeInstrumentor::BuildSchemaElements(BasicBlock* block, Schema& sche } //------------------------------------------------------------------------ -// ClassProbeInstrumentor::Instrument: add class probes to block +// HandleHistogramProbeInstrumentor::Instrument: add class probes to block // // Arguments: // block -- block of interest // schema -- instrumentation schema // profileMemory -- profile data slab // -void ClassProbeInstrumentor::Instrument(BasicBlock* block, Schema& schema, uint8_t* profileMemory) +void HandleHistogramProbeInstrumentor::Instrument(BasicBlock* block, Schema& schema, uint8_t* profileMemory) { - if ((block->bbFlags & BBF_HAS_CLASS_PROFILE) == 0) + if ((block->bbFlags & BBF_HAS_HISTOGRAM_PROFILE) == 0) { return; } @@ -1696,11 +1814,11 @@ void ClassProbeInstrumentor::Instrument(BasicBlock* block, Schema& schema, uint8 // Scan the statements and add class probes // - int classSchemaIndex = block->bbClassSchemaIndex; - assert((classSchemaIndex >= 0) && (classSchemaIndex < (int)schema.size())); + int histogramSchemaIndex = block->bbHistogramSchemaIndex; + assert((histogramSchemaIndex >= 0) && (histogramSchemaIndex < (int)schema.size())); - ClassProbeInserter insertProbes(schema, profileMemory, &classSchemaIndex, m_instrCount); - ClassProbeVisitor visitor(m_comp, insertProbes); + HandleHistogramProbeInserter insertProbes(schema, profileMemory, &histogramSchemaIndex, m_instrCount); + HandleHistogramProbeVisitor visitor(m_comp, insertProbes); for (Statement* const stmt : block->Statements()) { visitor.WalkTree(stmt->GetRootNodePointer(), nullptr); @@ -1789,24 +1907,25 @@ PhaseStatus Compiler::fgPrepareToInstrumentMethod() // Enable class profiling by default, when jitting. // Todo: we may also want this on by default for prejitting. // - const bool useClassProfiles = (JitConfig.JitClassProfiling() > 0) && !prejit; - if (useClassProfiles) + const bool useClassProfiles = (JitConfig.JitClassProfiling() > 0); + const bool useDelegateProfiles = (JitConfig.JitDelegateProfiling() > 0); + const bool useVTableProfiles = (JitConfig.JitVTableProfiling() > 0); + if (!prejit && (useClassProfiles || useDelegateProfiles || useVTableProfiles)) { - fgClassInstrumentor = new (this, CMK_Pgo) ClassProbeInstrumentor(this); + fgHistogramInstrumentor = new (this, CMK_Pgo) HandleHistogramProbeInstrumentor(this); } else { - JITDUMP("Not doing class profiling, because %s\n", - (JitConfig.JitClassProfiling() > 0) ? "class profiles disabled" : "prejit"); + JITDUMP("Not doing class/method profiling, because %s\n", prejit ? "prejit" : "class/method profiles disabled"); - fgClassInstrumentor = new (this, CMK_Pgo) NonInstrumentor(this); + fgHistogramInstrumentor = new (this, CMK_Pgo) NonInstrumentor(this); } // Make pre-import preparations. // const bool isPreImport = true; fgCountInstrumentor->Prepare(isPreImport); - fgClassInstrumentor->Prepare(isPreImport); + fgHistogramInstrumentor->Prepare(isPreImport); return PhaseStatus::MODIFIED_NOTHING; } @@ -1835,7 +1954,7 @@ PhaseStatus Compiler::fgInstrumentMethod() // const bool isPreImport = false; fgCountInstrumentor->Prepare(isPreImport); - fgClassInstrumentor->Prepare(isPreImport); + fgHistogramInstrumentor->Prepare(isPreImport); // Walk the flow graph to build up the instrumentation schema. // @@ -1847,27 +1966,12 @@ PhaseStatus Compiler::fgInstrumentMethod() fgCountInstrumentor->BuildSchemaElements(block, schema); } - if (fgClassInstrumentor->ShouldProcess(block)) + if (fgHistogramInstrumentor->ShouldProcess(block)) { - fgClassInstrumentor->BuildSchemaElements(block, schema); + fgHistogramInstrumentor->BuildSchemaElements(block, schema); } } - // Verify we created schema for the calls needing class probes. - // (we counted those when importing) - // - // This is not true when we do partial compilation; it can/will erase class probes, - // and there's no easy way to figure out how many should be left. - // - if (doesMethodHavePartialCompilationPatchpoints()) - { - assert(fgClassInstrumentor->SchemaCount() <= info.compClassProbeCount); - } - else - { - assert(fgClassInstrumentor->SchemaCount() == info.compClassProbeCount); - } - // Optionally, when jitting, if there were no class probes and only one count probe, // suppress instrumentation. // @@ -1887,7 +1991,7 @@ PhaseStatus Compiler::fgInstrumentMethod() minimalProbeMode = (JitConfig.JitMinimalJitProfiling() > 0); } - if (minimalProbeMode && (fgCountInstrumentor->SchemaCount() == 1) && (fgClassInstrumentor->SchemaCount() == 0)) + if (minimalProbeMode && (fgCountInstrumentor->SchemaCount() == 1) && (fgHistogramInstrumentor->SchemaCount() == 0)) { JITDUMP( "Not instrumenting method: minimal probing enabled, and method has only one counter and no class probes\n"); @@ -1895,7 +1999,7 @@ PhaseStatus Compiler::fgInstrumentMethod() } JITDUMP("Instrumenting method: %d count probes and %d class probes\n", fgCountInstrumentor->SchemaCount(), - fgClassInstrumentor->SchemaCount()); + fgHistogramInstrumentor->SchemaCount()); assert(schema.size() > 0); @@ -1928,7 +2032,7 @@ PhaseStatus Compiler::fgInstrumentMethod() // Do any cleanup we might need to do... // fgCountInstrumentor->SuppressProbes(); - fgClassInstrumentor->SuppressProbes(); + fgHistogramInstrumentor->SuppressProbes(); // If we needed to create cheap preds, we're done with them now. // @@ -1939,7 +2043,7 @@ PhaseStatus Compiler::fgInstrumentMethod() // We may have modified control flow preparing for instrumentation. // - const bool modifiedFlow = fgCountInstrumentor->ModifiedFlow() || fgClassInstrumentor->ModifiedFlow(); + const bool modifiedFlow = fgCountInstrumentor->ModifiedFlow() || fgHistogramInstrumentor->ModifiedFlow(); return modifiedFlow ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } @@ -1954,22 +2058,25 @@ PhaseStatus Compiler::fgInstrumentMethod() fgCountInstrumentor->Instrument(block, schema, profileMemory); } - if (fgClassInstrumentor->ShouldProcess(block)) + if (fgHistogramInstrumentor->ShouldProcess(block)) { - fgClassInstrumentor->Instrument(block, schema, profileMemory); + fgHistogramInstrumentor->Instrument(block, schema, profileMemory); } } // Verify we instrumented everthing we created schemas for. // assert(fgCountInstrumentor->InstrCount() == fgCountInstrumentor->SchemaCount()); - assert(fgClassInstrumentor->InstrCount() == fgClassInstrumentor->SchemaCount()); + + // Verify we instrumented for each probe + // + assert(fgHistogramInstrumentor->InstrCount() == info.compHandleHistogramProbeCount); // Add any special entry instrumentation. This does not // use the schema mechanism. // fgCountInstrumentor->InstrumentMethodEntry(schema, profileMemory); - fgClassInstrumentor->InstrumentMethodEntry(schema, profileMemory); + fgHistogramInstrumentor->InstrumentMethodEntry(schema, profileMemory); // If we needed to create cheap preds, we're done with them now. // @@ -2052,6 +2159,10 @@ PhaseStatus Compiler::fgIncorporateProfileData() fgPgoClassProfiles++; break; + case ICorJitInfo::PgoInstrumentationKind::GetLikelyMethod: + fgPgoMethodProfiles++; + break; + case ICorJitInfo::PgoInstrumentationKind::HandleHistogramIntCount: case ICorJitInfo::PgoInstrumentationKind::HandleHistogramLongCount: if (iSchema + 1 < fgPgoSchemaCount) diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index ff72fae3378d16..4bca92ad402577 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -150,7 +150,7 @@ struct BasicBlock; enum BasicBlockFlags : unsigned __int64; struct InlineCandidateInfo; struct GuardedDevirtualizationCandidateInfo; -struct ClassProfileCandidateInfo; +struct HandleHistogramProfileCandidateInfo; struct LateDevirtualizationInfo; typedef unsigned short AssertionIndex; @@ -5386,7 +5386,7 @@ struct GenTreeCall final : public GenTree // gtInlineCandidateInfo is only used when inlining methods InlineCandidateInfo* gtInlineCandidateInfo; GuardedDevirtualizationCandidateInfo* gtGuardedDevirtualizationCandidateInfo; - ClassProfileCandidateInfo* gtClassProfileCandidateInfo; + HandleHistogramProfileCandidateInfo* gtHandleHistogramProfileCandidateInfo; LateDevirtualizationInfo* gtLateDevirtualizationInfo; CORINFO_GENERIC_HANDLE compileTimeHelperArgumentHandle; // Used to track type handle argument of dynamic helpers void* gtDirectCallAddress; // Used to pass direct call address between lower and codegen diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 64be5105099f8f..e1015115da7e96 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -10070,11 +10070,23 @@ var_types Compiler::impImportCall(OPCODE opcode, call->gtFlags |= obj->gtFlags & GTF_GLOB_EFFECT; call->AsCall()->gtArgs.PushFront(this, NewCallArg::Primitive(obj).WellKnown(WellKnownArg::ThisPointer)); - // Is this a virtual or interface call? + if (impIsThis(obj)) + { + call->AsCall()->gtCallMoreFlags |= GTF_CALL_M_NONVIRT_SAME_THIS; + } + } + + bool probing; + probing = impConsiderCallProbe(call->AsCall(), rawILOffset); + + // See if we can devirt if we aren't probing. + if (!probing && opts.OptimizationEnabled()) + { if (call->AsCall()->IsVirtual()) { // only true object pointers can be virtual - assert(obj->gtType == TYP_REF); + assert(call->AsCall()->gtArgs.HasThisPointer() && + call->AsCall()->gtArgs.GetThisArg()->GetNode()->TypeIs(TYP_REF)); // See if we can devirtualize. @@ -10090,10 +10102,10 @@ var_types Compiler::impImportCall(OPCODE opcode, // methHnd = callInfo->hMethod; } - - if (impIsThis(obj)) + else if (call->AsCall()->IsDelegateInvoke()) { - call->AsCall()->gtCallMoreFlags |= GTF_CALL_M_NONVIRT_SAME_THIS; + considerGuardedDevirtualization(call->AsCall(), rawILOffset, false, NO_METHOD_HANDLE, NO_CLASS_HANDLE, + nullptr); } } @@ -10530,7 +10542,7 @@ var_types Compiler::impImportCall(OPCODE opcode, // important devirtualizations, we'll want to allow both a class probe and a captured context. // if (origCall->IsVirtual() && (origCall->gtCallType != CT_INDIRECT) && (exactContextHnd != nullptr) && - (origCall->gtClassProfileCandidateInfo == nullptr)) + (origCall->gtHandleHistogramProfileCandidateInfo == nullptr)) { JITDUMP("\nSaving context %p for call [%06u]\n", exactContextHnd, dspTreeID(origCall)); origCall->gtCallMoreFlags |= GTF_CALL_M_HAS_LATE_DEVIRT_INFO; @@ -12140,10 +12152,10 @@ GenTree* Compiler::impCastClassOrIsInstToTree( // Check if this cast helper have some profile data if (impIsCastHelperMayHaveProfileData(helper)) { - bool doRandomDevirt = false; - const int maxLikelyClasses = 32; - int likelyClassCount = 0; - LikelyClassRecord likelyClasses[maxLikelyClasses]; + bool doRandomDevirt = false; + const int maxLikelyClasses = 32; + int likelyClassCount = 0; + LikelyClassMethodRecord likelyClasses[maxLikelyClasses]; #ifdef DEBUG // Optional stress mode to pick a random known class, rather than // the most likely known class. @@ -12154,11 +12166,14 @@ GenTree* Compiler::impCastClassOrIsInstToTree( // Reuse the random inliner's random state. CLRRandom* const random = impInlineRoot()->m_inlineStrategy->GetRandom(JitConfig.JitRandomGuardedDevirtualization()); - likelyClasses[0].clsHandle = getRandomClass(fgPgoSchema, fgPgoSchemaCount, fgPgoData, ilOffset, random); - likelyClasses[0].likelihood = 100; - if (likelyClasses[0].clsHandle != NO_CLASS_HANDLE) + CORINFO_CLASS_HANDLE clsGuess; + CORINFO_METHOD_HANDLE methGuess; + getRandomGDV(fgPgoSchema, fgPgoSchemaCount, fgPgoData, ilOffset, random, &clsGuess, &methGuess); + if (clsGuess != NO_CLASS_HANDLE) { - likelyClassCount = 1; + likelyClasses[0].likelihood = 100; + likelyClasses[0].handle = (intptr_t)clsGuess; + likelyClassCount = 1; } } else @@ -12170,8 +12185,8 @@ GenTree* Compiler::impCastClassOrIsInstToTree( if (likelyClassCount > 0) { - LikelyClassRecord likelyClass = likelyClasses[0]; - CORINFO_CLASS_HANDLE likelyCls = likelyClass.clsHandle; + LikelyClassMethodRecord likelyClass = likelyClasses[0]; + CORINFO_CLASS_HANDLE likelyCls = (CORINFO_CLASS_HANDLE)likelyClass.handle; if ((likelyCls != NO_CLASS_HANDLE) && (likelyClass.likelihood > (UINT32)JitConfig.JitGuardedDevirtualizationChainLikelihood())) @@ -12206,13 +12221,14 @@ GenTree* Compiler::impCastClassOrIsInstToTree( op2->gtFlags |= GTF_DONT_CSE; GenTreeCall* call = gtNewHelperCallNode(helper, TYP_REF, op2, op1); - if (impIsCastHelperEligibleForClassProbe(call) && !impIsClassExact(pResolvedToken->hClass)) + if ((JitConfig.JitClassProfiling() > 0) && impIsCastHelperEligibleForClassProbe(call) && + !impIsClassExact(pResolvedToken->hClass)) { - ClassProfileCandidateInfo* pInfo = new (this, CMK_Inlining) ClassProfileCandidateInfo; - pInfo->ilOffset = ilOffset; - pInfo->probeIndex = info.compClassProbeCount++; - call->gtClassProfileCandidateInfo = pInfo; - compCurBB->bbFlags |= BBF_HAS_CLASS_PROFILE; + HandleHistogramProfileCandidateInfo* pInfo = new (this, CMK_Inlining) HandleHistogramProfileCandidateInfo; + pInfo->ilOffset = ilOffset; + pInfo->probeIndex = info.compHandleHistogramProbeCount++; + call->gtHandleHistogramProfileCandidateInfo = pInfo; + compCurBB->bbFlags |= BBF_HAS_HISTOGRAM_PROFILE; } return call; } @@ -21004,7 +21020,7 @@ void Compiler::impMarkInlineCandidateHelper(GenTreeCall* call, // Delegate Invoke method doesn't have a body and gets special cased instead. // Don't even bother trying to inline it. - if (call->IsDelegateInvoke()) + if (call->IsDelegateInvoke() && !call->IsGuardedDevirtualizationCandidate()) { inlineResult.NoteFatal(InlineObservation::CALLEE_HAS_NO_BODY); return; @@ -21389,51 +21405,7 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, // This should be a virtual vtable or virtual stub call. // assert(call->IsVirtual()); - - // Possibly instrument. Note for OSR+PGO we will instrument when - // optimizing and (currently) won't devirtualize. We may want - // to revisit -- if we can devirtualize we should be able to - // suppress the probe. - // - // We strip BBINSTR from inlinees currently, so we'll only - // do this for the root method calls. - // - if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR)) - { - assert(opts.OptimizationDisabled() || opts.IsOSR()); - assert(!compIsForInlining()); - - // During importation, optionally flag this block as one that - // contains calls requiring class profiling. Ideally perhaps - // we'd just keep track of the calls themselves, so we don't - // have to search for them later. - // - if ((call->gtCallType != CT_INDIRECT) && opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR) && - !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) && (JitConfig.JitClassProfiling() > 0) && - !isLateDevirtualization) - { - JITDUMP("\n ... marking [%06u] in " FMT_BB " for class profile instrumentation\n", dspTreeID(call), - compCurBB->bbNum); - ClassProfileCandidateInfo* pInfo = new (this, CMK_Inlining) ClassProfileCandidateInfo; - - // Record some info needed for the class profiling probe. - // - pInfo->ilOffset = ilOffset; - pInfo->probeIndex = info.compClassProbeCount++; - call->gtClassProfileCandidateInfo = pInfo; - - // Flag block as needing scrutiny - // - compCurBB->bbFlags |= BBF_HAS_CLASS_PROFILE; - } - return; - } - - // Bail if optimizations are disabled. - if (opts.OptimizationDisabled()) - { - return; - } + assert(opts.OptimizationEnabled()); #if defined(DEBUG) // Bail if devirt is disabled. @@ -21525,8 +21497,7 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, return; } - considerGuardedDevirtualization(call, ilOffset, isInterface, baseMethod, baseClass, - pContextHandle DEBUGARG(objClass) DEBUGARG("unknown")); + considerGuardedDevirtualization(call, ilOffset, isInterface, baseMethod, baseClass, pContextHandle); return; } @@ -21576,8 +21547,7 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, return; } - considerGuardedDevirtualization(call, ilOffset, isInterface, baseMethod, baseClass, - pContextHandle DEBUGARG(objClass) DEBUGARG(objClassName)); + considerGuardedDevirtualization(call, ilOffset, isInterface, baseMethod, baseClass, pContextHandle); return; } @@ -21693,8 +21663,7 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, return; } - considerGuardedDevirtualization(call, ilOffset, isInterface, baseMethod, baseClass, - pContextHandle DEBUGARG(objClass) DEBUGARG(objClassName)); + considerGuardedDevirtualization(call, ilOffset, isInterface, baseMethod, baseClass, pContextHandle); return; } @@ -21714,6 +21683,7 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, call->gtFlags &= ~GTF_CALL_VIRT_STUB; call->gtCallMethHnd = derivedMethod; call->gtCallType = CT_USER_FUNC; + call->gtControlExpr = nullptr; call->gtCallMoreFlags |= GTF_CALL_M_DEVIRTUALIZED; // Virtual calls include an implicit null check, which we may @@ -21755,14 +21725,14 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, if (JitConfig.JitCrossCheckDevirtualizationAndPGO() && canSensiblyCheck) { // We only can handle a single likely class for now - const int maxLikelyClasses = 1; - LikelyClassRecord likelyClasses[maxLikelyClasses]; + const int maxLikelyClasses = 1; + LikelyClassMethodRecord likelyClasses[maxLikelyClasses]; UINT32 numberOfClasses = getLikelyClasses(likelyClasses, maxLikelyClasses, fgPgoSchema, fgPgoSchemaCount, fgPgoData, ilOffset); UINT32 likelihood = likelyClasses[0].likelihood; - CORINFO_CLASS_HANDLE likelyClass = likelyClasses[0].clsHandle; + CORINFO_CLASS_HANDLE likelyClass = (CORINFO_CLASS_HANDLE)likelyClasses[0].handle; if (numberOfClasses > 0) { @@ -22053,6 +22023,117 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, #endif // FEATURE_READYTORUN } +//------------------------------------------------------------------------ +// impConsiderCallProbe: Consider whether a call should get a histogram probe +// and mark it if so. +// +// Arguments: +// call - The call +// ilOffset - The precise IL offset of the call +// +// Returns: +// True if the call was marked such that we will add a class or method probe for it. +// +bool Compiler::impConsiderCallProbe(GenTreeCall* call, IL_OFFSET ilOffset) +{ + // Possibly instrument. Note for OSR+PGO we will instrument when + // optimizing and (currently) won't devirtualize. We may want + // to revisit -- if we can devirtualize we should be able to + // suppress the probe. + // + // We strip BBINSTR from inlinees currently, so we'll only + // do this for the root method calls. + // + if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR)) + { + return false; + } + + assert(opts.OptimizationDisabled() || opts.IsOSR()); + assert(!compIsForInlining()); + + // During importation, optionally flag this block as one that + // contains calls requiring class profiling. Ideally perhaps + // we'd just keep track of the calls themselves, so we don't + // have to search for them later. + // + if (compClassifyGDVProbeType(call) == GDVProbeType::None) + { + return false; + } + + JITDUMP("\n ... marking [%06u] in " FMT_BB " for method/class profile instrumentation\n", dspTreeID(call), + compCurBB->bbNum); + HandleHistogramProfileCandidateInfo* pInfo = new (this, CMK_Inlining) HandleHistogramProfileCandidateInfo; + + // Record some info needed for the class profiling probe. + // + pInfo->ilOffset = ilOffset; + pInfo->probeIndex = info.compHandleHistogramProbeCount++; + call->gtHandleHistogramProfileCandidateInfo = pInfo; + + // Flag block as needing scrutiny + // + compCurBB->bbFlags |= BBF_HAS_HISTOGRAM_PROFILE; + return true; +} + +//------------------------------------------------------------------------ +// compClassifyGDVProbeType: +// Classify the type of GDV probe to use for a call site. +// +// Arguments: +// call - The call +// +// Returns: +// The type of probe to use. +// +Compiler::GDVProbeType Compiler::compClassifyGDVProbeType(GenTreeCall* call) +{ + if (call->gtCallType == CT_INDIRECT) + { + return GDVProbeType::None; + } + + if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR) || opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT)) + { + return GDVProbeType::None; + } + + bool createTypeHistogram = false; + if (JitConfig.JitClassProfiling() > 0) + { + createTypeHistogram = call->IsVirtualStub() || call->IsVirtualVtable(); + + // Cast helpers may conditionally (depending on whether the class is + // exact or not) have probes. For those helpers we do not use this + // function to classify the probe type until after we have decided on + // whether we probe them or not. + createTypeHistogram = createTypeHistogram || (impIsCastHelperEligibleForClassProbe(call) && + (call->gtHandleHistogramProfileCandidateInfo != nullptr)); + } + + bool createMethodHistogram = ((JitConfig.JitDelegateProfiling() > 0) && call->IsDelegateInvoke()) || + ((JitConfig.JitVTableProfiling() > 0) && call->IsVirtualVtable()); + + if (createTypeHistogram && createMethodHistogram) + { + return GDVProbeType::MethodAndClassProfile; + } + + if (createTypeHistogram) + { + return GDVProbeType::ClassProfile; + } + + if (createMethodHistogram) + { + return GDVProbeType::MethodProfile; + } + + return GDVProbeType::None; +} + //------------------------------------------------------------------------ // impGetSpecialIntrinsicExactReturnType: Look for special cases where a call // to an intrinsic returns an exact type @@ -22063,7 +22144,6 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call, // Returns: // Exact class handle returned by the intrinsic call, if known. // Nullptr if not known, or not likely to lead to beneficial optimization. - CORINFO_CLASS_HANDLE Compiler::impGetSpecialIntrinsicExactReturnType(CORINFO_METHOD_HANDLE methodHnd) { JITDUMP("Special intrinsic: looking for exact type returned by %s\n", eeGetMethodFullName(methodHnd)); @@ -22225,153 +22305,378 @@ void Compiler::addFatPointerCandidate(GenTreeCall* call) } //------------------------------------------------------------------------ -// considerGuardedDevirtualization: see if we can profitably guess at the -// class involved in an interface or virtual call. +// pickGDV: Use profile information to pick a GDV candidate for a call site. // // Arguments: +// call - the call +// ilOffset - exact IL offset of the call +// isInterface - whether or not the call target is defined on an interface +// classGuess - [out] the class to guess for (mutually exclusive with methodGuess) +// methodGuess - [out] the method to guess for (mutually exclusive with classGuess) +// likelihood - [out] an estimate of the likelihood that the guess will succeed // -// call - potential guarded devirtualization candidate -// ilOffset - IL ofset of the call instruction -// isInterface - true if this is an interface call -// baseMethod - target method of the call -// baseClass - class that introduced the target method -// pContextHandle - context handle for the call -// objClass - class of 'this' in the call -// objClassName - name of the obj Class -// -// Notes: -// Consults with VM to see if there's a likely class at runtime, -// if so, adds a candidate for guarded devirtualization. -// -void Compiler::considerGuardedDevirtualization( - GenTreeCall* call, - IL_OFFSET ilOffset, - bool isInterface, - CORINFO_METHOD_HANDLE baseMethod, - CORINFO_CLASS_HANDLE baseClass, - CORINFO_CONTEXT_HANDLE* pContextHandle DEBUGARG(CORINFO_CLASS_HANDLE objClass) DEBUGARG(const char* objClassName)) +void Compiler::pickGDV(GenTreeCall* call, + IL_OFFSET ilOffset, + bool isInterface, + CORINFO_CLASS_HANDLE* classGuess, + CORINFO_METHOD_HANDLE* methodGuess, + unsigned* likelihood) { -#if defined(DEBUG) - const char* callKind = isInterface ? "interface" : "virtual"; -#endif + *classGuess = NO_CLASS_HANDLE; + *methodGuess = NO_METHOD_HANDLE; + *likelihood = 0; - JITDUMP("Considering guarded devirtualization at IL offset %u (0x%x)\n", ilOffset, ilOffset); + const int maxLikelyClasses = 32; + LikelyClassMethodRecord likelyClasses[maxLikelyClasses]; + unsigned numberOfClasses = 0; + if (call->IsVirtualStub() || call->IsVirtualVtable()) + { + numberOfClasses = + getLikelyClasses(likelyClasses, maxLikelyClasses, fgPgoSchema, fgPgoSchemaCount, fgPgoData, ilOffset); + } - // We currently only get likely class guesses when there is PGO data - // with class profiles. + const int maxLikelyMethods = 32; + LikelyClassMethodRecord likelyMethods[maxLikelyMethods]; + unsigned numberOfMethods = 0; + + // TODO-GDV: R2R support requires additional work to reacquire the + // entrypoint, similar to what happens at the end of impDevirtualizeCall. + // As part of supporting this we should merge the tail of + // impDevirtualizeCall and what happens in + // GuardedDevirtualizationTransformer::CreateThen for method GDV. // - if (fgPgoClassProfiles == 0) + if (!opts.IsReadyToRun() && (call->IsVirtualVtable() || call->IsDelegateInvoke())) + { + numberOfMethods = + getLikelyMethods(likelyMethods, maxLikelyMethods, fgPgoSchema, fgPgoSchemaCount, fgPgoData, ilOffset); + } + + if ((numberOfClasses < 1) && (numberOfMethods < 1)) { - JITDUMP("Not guessing for class: no class profile pgo data, or pgo disabled\n"); + JITDUMP("No likely class or method, sorry\n"); return; } - // See if there's a likely guess for the class. - // - const unsigned likelihoodThreshold = isInterface ? 25 : 30; - unsigned likelihood = 0; - unsigned numberOfClasses = 0; +#ifdef DEBUG + if ((verbose || JitConfig.EnableExtraSuperPmiQueries()) && (numberOfClasses > 0)) + { + bool isExact; + bool isNonNull; + CallArg* thisArg = call->gtArgs.GetThisArg(); + CORINFO_CLASS_HANDLE declaredThisClsHnd = gtGetClassHandle(thisArg->GetNode(), &isExact, &isNonNull); + JITDUMP("Likely classes for call [%06u]", dspTreeID(call)); + if (declaredThisClsHnd != NO_CLASS_HANDLE) + { + const char* baseClassName = eeGetClassName(declaredThisClsHnd); + JITDUMP(" on class %p (%s)", declaredThisClsHnd, baseClassName); + } + JITDUMP("\n"); + + for (UINT32 i = 0; i < numberOfClasses; i++) + { + const char* className = eeGetClassName((CORINFO_CLASS_HANDLE)likelyClasses[i].handle); + JITDUMP(" %u) %p (%s) [likelihood:%u%%]\n", i + 1, likelyClasses[i].handle, className, + likelyClasses[i].likelihood); + } + } - CORINFO_CLASS_HANDLE likelyClass = NO_CLASS_HANDLE; + if ((verbose || JitConfig.EnableExtraSuperPmiQueries()) && (numberOfMethods > 0)) + { + assert(call->gtCallType == CT_USER_FUNC); + const char* baseMethName = eeGetMethodFullName(call->gtCallMethHnd); + JITDUMP("Likely methods for call [%06u] to method %s\n", dspTreeID(call), baseMethName); - bool doRandomDevirt = false; + for (UINT32 i = 0; i < numberOfMethods; i++) + { + CORINFO_CONST_LOOKUP lookup = {}; + info.compCompHnd->getFunctionFixedEntryPoint((CORINFO_METHOD_HANDLE)likelyMethods[i].handle, false, + &lookup); - const int maxLikelyClasses = 32; - LikelyClassRecord likelyClasses[maxLikelyClasses]; + const char* methName = eeGetMethodFullName((CORINFO_METHOD_HANDLE)likelyMethods[i].handle); + switch (lookup.accessType) + { + case IAT_VALUE: + JITDUMP(" %u) %p (%s) [likelihood:%u%%]\n", i + 1, lookup.addr, methName, + likelyMethods[i].likelihood); + break; + case IAT_PVALUE: + JITDUMP(" %u) [%p] (%s) [likelihood:%u%%]\n", i + 1, lookup.addr, methName, + likelyMethods[i].likelihood); + break; + case IAT_PPVALUE: + JITDUMP(" %u) [[%p]] (%s) [likelihood:%u%%]\n", i + 1, lookup.addr, methName, + likelyMethods[i].likelihood); + break; + default: + JITDUMP(" %u) %s [likelihood:%u%%]\n", i + 1, methName, likelyMethods[i].likelihood); + break; + } + } + } -#ifdef DEBUG // Optional stress mode to pick a random known class, rather than // the most likely known class. // - doRandomDevirt = JitConfig.JitRandomGuardedDevirtualization() != 0; - - if (doRandomDevirt) + if (JitConfig.JitRandomGuardedDevirtualization() != 0) { // Reuse the random inliner's random state. // CLRRandom* const random = impInlineRoot()->m_inlineStrategy->GetRandom(JitConfig.JitRandomGuardedDevirtualization()); - likelyClasses[0].clsHandle = getRandomClass(fgPgoSchema, fgPgoSchemaCount, fgPgoData, ilOffset, random); - likelyClasses[0].likelihood = 100; - if (likelyClasses[0].clsHandle != NO_CLASS_HANDLE) + // TODO-GDV: This can be simplified to just use likelyClasses and + // likelyMethods now that we have multiple candidates here. + getRandomGDV(fgPgoSchema, fgPgoSchemaCount, fgPgoData, ilOffset, random, classGuess, methodGuess); + if (*classGuess != NO_CLASS_HANDLE) + { + JITDUMP("Picked random class for GDV: %p (%s)\n", *classGuess, eeGetClassName(*classGuess)); + return; + } + if (*methodGuess != NO_METHOD_HANDLE) { - numberOfClasses = 1; + JITDUMP("Picked random method for GDV: %p (%s)\n", *methodGuess, eeGetMethodFullName(*methodGuess)); + return; } } - else #endif + + // Prefer class guess as it is cheaper + if (numberOfClasses > 0) { - numberOfClasses = - getLikelyClasses(likelyClasses, maxLikelyClasses, fgPgoSchema, fgPgoSchemaCount, fgPgoData, ilOffset); + unsigned likelihoodThreshold = isInterface ? 25 : 30; + if (likelyClasses[0].likelihood >= likelihoodThreshold) + { + *classGuess = (CORINFO_CLASS_HANDLE)likelyClasses[0].handle; + *likelihood = likelyClasses[0].likelihood; + return; + } + + JITDUMP("Not guessing for class; likelihood is below %s call threshold %u\n", + isInterface ? "interface" : "virtual", likelihoodThreshold); } - // For now we only use the most popular type + if (numberOfMethods > 0) + { + unsigned likelihoodThreshold = 30; + if (likelyMethods[0].likelihood >= likelihoodThreshold) + { + *methodGuess = (CORINFO_METHOD_HANDLE)likelyMethods[0].handle; + *likelihood = likelyMethods[0].likelihood; + return; + } - likelihood = likelyClasses[0].likelihood; - likelyClass = likelyClasses[0].clsHandle; + JITDUMP("Not guessing for method; likelihood is below %s call threshold %u\n", + call->IsDelegateInvoke() ? "delegate" : "virtual", likelihoodThreshold); + } +} - if (numberOfClasses < 1) +//------------------------------------------------------------------------ +// isCompatibleMethodGDV: +// Check if devirtualizing a call node as a specified target method call is +// reasonable. +// +// Arguments: +// call - the call +// gdvTarget - the target method that we want to guess for and devirtualize to +// +// Returns: +// true if we can proceed with GDV. +// +// Notes: +// This implements a small simplified signature-compatibility check to +// verify that a guess is reasonable. The main goal here is to avoid blowing +// up the JIT on PGO data with stale GDV candidates; if they are not +// compatible in the ECMA sense then we do not expect the guard to ever pass +// at runtime, so we can get by with simplified rules here. +// +bool Compiler::isCompatibleMethodGDV(GenTreeCall* call, CORINFO_METHOD_HANDLE gdvTarget) +{ + CORINFO_SIG_INFO sig; + info.compCompHnd->getMethodSig(gdvTarget, &sig); + + CORINFO_ARG_LIST_HANDLE sigParam = sig.args; + unsigned numParams = sig.numArgs; + unsigned numArgs = 0; + for (CallArg& arg : call->gtArgs.Args()) { - JITDUMP("No likely class, sorry\n"); - return; - } + switch (arg.GetWellKnownArg()) + { + case WellKnownArg::RetBuffer: + case WellKnownArg::ThisPointer: + // Not part of signature but we still expect to see it here + continue; + case WellKnownArg::None: + break; + default: + assert(!"Unexpected well known arg to method GDV candidate"); + continue; + } + + numArgs++; + if (numArgs > numParams) + { + JITDUMP("Incompatible method GDV: call [%06u] has more arguments than signature (sig has %d parameters)\n", + dspTreeID(call), numParams); + return false; + } + + CORINFO_CLASS_HANDLE classHnd = NO_CLASS_HANDLE; + CorInfoType corType = strip(info.compCompHnd->getArgType(&sig, sigParam, &classHnd)); + var_types sigType = JITtype2varType(corType); + + if (!impCheckImplicitArgumentCoercion(sigType, arg.GetNode()->TypeGet())) + { + JITDUMP("Incompatible method GDV: arg [%06u] is type-incompatible with signature of target\n", + dspTreeID(arg.GetNode())); + return false; + } - assert(likelyClass != NO_CLASS_HANDLE); + // Best-effort check for struct compatibility here. + if (varTypeIsStruct(sigType) && (arg.GetSignatureClassHandle() != classHnd)) + { + ClassLayout* callLayout = typGetObjLayout(arg.GetSignatureClassHandle()); + ClassLayout* tarLayout = typGetObjLayout(classHnd); - // Print all likely classes - JITDUMP("%s classes for %p (%s):\n", doRandomDevirt ? "Random" : "Likely", dspPtr(objClass), objClassName) - for (UINT32 i = 0; i < numberOfClasses; i++) + if (!ClassLayout::AreCompatible(callLayout, tarLayout)) + { + JITDUMP("Incompatible method GDV: struct arg [%06u] is layout-incompatible with signature of target\n", + dspTreeID(arg.GetNode())); + return false; + } + } + + sigParam = info.compCompHnd->getArgNext(sigParam); + } + + if (numArgs < numParams) { - JITDUMP(" %u) %p (%s) [likelihood:%u%%]\n", i + 1, likelyClasses[i].clsHandle, - eeGetClassName(likelyClasses[i].clsHandle), likelyClasses[i].likelihood); + JITDUMP("Incompatible method GDV: call [%06u] has fewer arguments (%d) than signature (%d)\n", dspTreeID(call), + numArgs, numParams); + return false; } - // Todo: a more advanced heuristic using likelihood, number of - // classes, and the profile count for this block. - // - // For now we will guess if the likelihood is at least 25%/30% (intfc/virt), as studies - // have shown this transformation should pay off even if we guess wrong sometimes. + return true; +} + +//------------------------------------------------------------------------ +// considerGuardedDevirtualization: see if we can profitably guess at the +// class involved in an interface or virtual call. +// +// Arguments: +// +// call - potential guarded devirtualization candidate +// ilOffset - IL ofset of the call instruction +// baseMethod - target method of the call +// baseClass - class that introduced the target method +// pContextHandle - context handle for the call +// +// Notes: +// Consults with VM to see if there's a likely class at runtime, +// if so, adds a candidate for guarded devirtualization. +// +void Compiler::considerGuardedDevirtualization(GenTreeCall* call, + IL_OFFSET ilOffset, + bool isInterface, + CORINFO_METHOD_HANDLE baseMethod, + CORINFO_CLASS_HANDLE baseClass, + CORINFO_CONTEXT_HANDLE* pContextHandle) +{ + JITDUMP("Considering guarded devirtualization at IL offset %u (0x%x)\n", ilOffset, ilOffset); + + // We currently only get likely class guesses when there is PGO data + // with class profiles. // - if (likelihood < likelihoodThreshold) + if ((fgPgoClassProfiles == 0) && (fgPgoMethodProfiles == 0)) { - JITDUMP("Not guessing for class; likelihood is below %s call threshold %u\n", callKind, likelihoodThreshold); + JITDUMP("Not guessing for class or method: no GDV profile pgo data, or pgo disabled\n"); return; } - uint32_t const likelyClassAttribs = info.compCompHnd->getClassAttribs(likelyClass); + CORINFO_CLASS_HANDLE likelyClass; + CORINFO_METHOD_HANDLE likelyMethod; + unsigned likelihood; + pickGDV(call, ilOffset, isInterface, &likelyClass, &likelyMethod, &likelihood); - if ((likelyClassAttribs & CORINFO_FLG_ABSTRACT) != 0) + if ((likelyClass == NO_CLASS_HANDLE) && (likelyMethod == NO_METHOD_HANDLE)) { - // We may see an abstract likely class, if we have a stale profile. - // No point guessing for this. - // - JITDUMP("Not guessing for class; abstract (stale profile)\n"); return; } - // Figure out which method will be called. - // - CORINFO_DEVIRTUALIZATION_INFO dvInfo; - dvInfo.virtualMethod = baseMethod; - dvInfo.objClass = likelyClass; - dvInfo.context = *pContextHandle; - dvInfo.exactContext = *pContextHandle; - dvInfo.pResolvedTokenVirtualMethod = nullptr; + uint32_t likelyClassAttribs = 0; + if (likelyClass != NO_CLASS_HANDLE) + { + likelyClassAttribs = info.compCompHnd->getClassAttribs(likelyClass); - const bool canResolve = info.compCompHnd->resolveVirtualMethod(&dvInfo); + if ((likelyClassAttribs & CORINFO_FLG_ABSTRACT) != 0) + { + // We may see an abstract likely class, if we have a stale profile. + // No point guessing for this. + // + JITDUMP("Not guessing for class; abstract (stale profile)\n"); + return; + } + + // Figure out which method will be called. + // + CORINFO_DEVIRTUALIZATION_INFO dvInfo; + dvInfo.virtualMethod = baseMethod; + dvInfo.objClass = likelyClass; + dvInfo.context = *pContextHandle; + dvInfo.exactContext = *pContextHandle; + dvInfo.pResolvedTokenVirtualMethod = nullptr; + + const bool canResolve = info.compCompHnd->resolveVirtualMethod(&dvInfo); + + if (!canResolve) + { + JITDUMP("Can't figure out which method would be invoked, sorry\n"); + return; + } - if (!canResolve) + likelyMethod = dvInfo.devirtualizedMethod; + } + + uint32_t likelyMethodAttribs = info.compCompHnd->getMethodAttribs(likelyMethod); + + if (likelyClass == NO_CLASS_HANDLE) { - JITDUMP("Can't figure out which method would be invoked, sorry\n"); - return; + // For method GDV do a few more checks that we get for free in the + // resolve call above for class-based GDV. + if ((likelyMethodAttribs & CORINFO_FLG_STATIC) != 0) + { + assert(call->IsDelegateInvoke()); + JITDUMP("Cannot currently handle devirtualizing static delegate calls, sorry\n"); + return; + } + + // Verify that the call target and args look reasonable so that the JIT + // does not blow up during inlining/call morphing. + // + // NOTE: Once we want to support devirtualization of delegate calls to + // static methods and remove the check above we will start failing here + // for delegates pointing to static methods that have the first arg + // bound. For example: + // + // public static void E(this C c) ... + // Action a = new C().E; + // + // The delegate instance looks exactly like one pointing to an instance + // method in this case and the call will have zero args while the + // signature has 1 arg. + // + if (!isCompatibleMethodGDV(call, likelyMethod)) + { + JITDUMP("Target for method-based GDV is incompatible (stale profile?)\n"); + assert((fgPgoSource != ICorJitInfo::PgoSource::Dynamic) && "Unexpected stale profile in dynamic PGO data"); + return; + } } - CORINFO_METHOD_HANDLE likelyMethod = dvInfo.devirtualizedMethod; - JITDUMP("%s call would invoke method %s\n", callKind, eeGetMethodName(likelyMethod, nullptr)); + JITDUMP("%s call would invoke method %s\n", + isInterface ? "interface" : call->IsDelegateInvoke() ? "delegate" : "virtual", + eeGetMethodName(likelyMethod, nullptr)); // Add this as a potential candidate. // - uint32_t const likelyMethodAttribs = info.compCompHnd->getMethodAttribs(likelyMethod); addGuardedDevirtualizationCandidate(call, likelyMethod, likelyClass, likelyMethodAttribs, likelyClassAttribs, likelihood); } @@ -22404,8 +22709,8 @@ void Compiler::addGuardedDevirtualizationCandidate(GenTreeCall* call, unsigned classAttr, unsigned likelihood) { - // This transformation only makes sense for virtual calls - assert(call->IsVirtual()); + // This transformation only makes sense for delegate and virtual calls + assert(call->IsDelegateInvoke() || call->IsVirtual()); // Only mark calls if the feature is enabled. const bool isEnabled = JitConfig.JitEnableGuardedDevirtualization() > 0; @@ -22455,8 +22760,9 @@ void Compiler::addGuardedDevirtualizationCandidate(GenTreeCall* call, // We're all set, proceed with candidate creation. // - JITDUMP("Marking call [%06u] as guarded devirtualization candidate; will guess for class %s\n", dspTreeID(call), - eeGetClassName(classHandle)); + JITDUMP("Marking call [%06u] as guarded devirtualization candidate; will guess for %s %s\n", dspTreeID(call), + classHandle != NO_CLASS_HANDLE ? "class" : "method", + classHandle != NO_CLASS_HANDLE ? eeGetClassName(classHandle) : eeGetMethodFullName(methodHandle)); setMethodHasGuardedDevirtualization(); call->SetGuardedDevirtualizationCandidate(); diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp index 14242b8f718b90..e3d799f734b8ba 100644 --- a/src/coreclr/jit/indirectcalltransformer.cpp +++ b/src/coreclr/jit/indirectcalltransformer.cpp @@ -450,9 +450,11 @@ class IndirectCallTransformer class GuardedDevirtualizationTransformer final : public Transformer { + unsigned m_targetLclNum; + public: GuardedDevirtualizationTransformer(Compiler* compiler, BasicBlock* block, Statement* stmt) - : Transformer(compiler, block, stmt), returnTemp(BAD_VAR_NUM) + : Transformer(compiler, block, stmt), m_targetLclNum(BAD_VAR_NUM), returnTemp(BAD_VAR_NUM) { } @@ -538,23 +540,26 @@ class IndirectCallTransformer checkBlock = currBlock; checkBlock->bbJumpKind = BBJ_COND; - // Fetch method table from object arg to call. - GenTree* thisTree = compiler->gtCloneExpr(origCall->gtArgs.GetThisArg()->GetNode()); + CallArg* thisArg = origCall->gtArgs.GetThisArg(); + GenTree* thisTree = thisArg->GetNode(); // Create temp for this if the tree is costly. - if (!thisTree->IsLocal()) + if (thisTree->IsLocal()) + { + thisTree = compiler->gtCloneExpr(thisTree); + } + else { const unsigned thisTempNum = compiler->lvaGrabTemp(true DEBUGARG("guarded devirt this temp")); - // lvaSetClass(thisTempNum, ...); - GenTree* asgTree = compiler->gtNewTempAssign(thisTempNum, thisTree); - Statement* asgStmt = compiler->fgNewStmtFromTree(asgTree, stmt->GetDebugInfo()); + GenTree* asgTree = compiler->gtNewTempAssign(thisTempNum, thisTree); + Statement* asgStmt = compiler->fgNewStmtFromTree(asgTree, stmt->GetDebugInfo()); compiler->fgInsertStmtAtEnd(checkBlock, asgStmt); thisTree = compiler->gtNewLclvNode(thisTempNum, TYP_REF); // Propagate the new this to the call. Must be a new expr as the call // will live on in the else block and thisTree is used below. - origCall->gtArgs.GetThisArg()->SetEarlyNode(compiler->gtNewLclvNode(thisTempNum, TYP_REF)); + thisArg->SetEarlyNode(compiler->gtNewLclvNode(thisTempNum, TYP_REF)); } // Remember the current last statement. If we're doing a chained GDV, we'll clone/copy @@ -565,18 +570,96 @@ class IndirectCallTransformer // lastStmt = checkBlock->lastStmt(); - // Find target method table - // - GenTree* methodTable = compiler->gtNewMethodTableLookup(thisTree); - GuardedDevirtualizationCandidateInfo* guardedInfo = origCall->gtGuardedDevirtualizationCandidateInfo; - CORINFO_CLASS_HANDLE clsHnd = guardedInfo->guardedClassHandle; - GenTree* targetMethodTable = compiler->gtNewIconEmbClsHndNode(clsHnd); + GuardedDevirtualizationCandidateInfo* guardedInfo = origCall->gtGuardedDevirtualizationCandidateInfo; - // Compare and jump to else (which does the indirect call) if NOT equal - // - GenTree* methodTableCompare = compiler->gtNewOperNode(GT_NE, TYP_INT, targetMethodTable, methodTable); - GenTree* jmpTree = compiler->gtNewOperNode(GT_JTRUE, TYP_VOID, methodTableCompare); - Statement* jmpStmt = compiler->fgNewStmtFromTree(jmpTree, stmt->GetDebugInfo()); + // Create comparison. On success we will jump to do the indirect call. + GenTree* compare; + if (guardedInfo->guardedClassHandle != NO_CLASS_HANDLE) + { + // Find target method table + // + GenTree* methodTable = compiler->gtNewMethodTableLookup(thisTree); + CORINFO_CLASS_HANDLE clsHnd = guardedInfo->guardedClassHandle; + GenTree* targetMethodTable = compiler->gtNewIconEmbClsHndNode(clsHnd); + + compare = compiler->gtNewOperNode(GT_NE, TYP_INT, targetMethodTable, methodTable); + } + else + { + assert(origCall->IsVirtualVtable() || origCall->IsDelegateInvoke()); + // We reuse the target except if this is a chained GDV, in + // which case the check will be moved into the success case of + // a previous GDV and thus may not execute when we hit the cold + // path. + // TODO-GDV: Consider duplicating the store at the end of the + // cold case for the previous GDV. Then we can reuse the target + // if the second check of a chained GDV fails. + bool reuseTarget = (origCall->gtCallMoreFlags & GTF_CALL_M_GUARDED_DEVIRT_CHAIN) == 0; + if (origCall->IsVirtualVtable()) + { + GenTree* tarTree = compiler->fgExpandVirtualVtableCallTarget(origCall); + + if (reuseTarget) + { + m_targetLclNum = compiler->lvaGrabTemp(false DEBUGARG("guarded devirt call target temp")); + + GenTree* asgTree = compiler->gtNewTempAssign(m_targetLclNum, tarTree); + Statement* asgStmt = compiler->fgNewStmtFromTree(asgTree, stmt->GetDebugInfo()); + compiler->fgInsertStmtAtEnd(checkBlock, asgStmt); + + tarTree = compiler->gtNewLclvNode(m_targetLclNum, TYP_I_IMPL); + } + + CORINFO_METHOD_HANDLE methHnd = guardedInfo->guardedMethodHandle; + CORINFO_CONST_LOOKUP lookup; + compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &lookup); + + GenTree* compareTarTree = CreateTreeForLookup(methHnd, lookup); + compare = compiler->gtNewOperNode(GT_NE, TYP_INT, compareTarTree, tarTree); + } + else + { + // Reusing the call target for delegates is more + // complicated. Essentially we need to do the + // transformation done in LowerDelegateInvoke by converting + // the call to CT_INDIRECT and reusing the target address. + // We will do that transformation in CreateElse, but here + // we need to stash the target. + CLANG_FORMAT_COMMENT_ANCHOR; +#ifdef TARGET_ARM + // Not impossible to support, but would additionally + // require us to load the wrapper delegate cell when + // expanding. + reuseTarget &= (origCall->gtCallMoreFlags & GTF_CALL_M_WRAPPER_DELEGATE_INV) == 0; +#endif + + GenTree* offset = + compiler->gtNewIconNode((ssize_t)compiler->eeGetEEInfo()->offsetOfDelegateFirstTarget, + TYP_I_IMPL); + GenTree* tarTree = compiler->gtNewOperNode(GT_ADD, TYP_BYREF, thisTree, offset); + tarTree = compiler->gtNewIndir(TYP_I_IMPL, tarTree); + + if (reuseTarget) + { + m_targetLclNum = compiler->lvaGrabTemp(false DEBUGARG("guarded devirt call target temp")); + + GenTree* asgTree = compiler->gtNewTempAssign(m_targetLclNum, tarTree); + Statement* asgStmt = compiler->fgNewStmtFromTree(asgTree, stmt->GetDebugInfo()); + compiler->fgInsertStmtAtEnd(checkBlock, asgStmt); + tarTree = compiler->gtNewLclvNode(m_targetLclNum, TYP_I_IMPL); + } + + CORINFO_METHOD_HANDLE methHnd = guardedInfo->guardedMethodHandle; + CORINFO_CONST_LOOKUP lookup; + compiler->info.compCompHnd->getFunctionFixedEntryPoint(methHnd, false, &lookup); + + GenTree* compareTarTree = CreateTreeForLookup(methHnd, lookup); + compare = compiler->gtNewOperNode(GT_NE, TYP_INT, compareTarTree, tarTree); + } + } + + GenTree* jmpTree = compiler->gtNewOperNode(GT_JTRUE, TYP_VOID, compare); + Statement* jmpStmt = compiler->fgNewStmtFromTree(jmpTree, stmt->GetDebugInfo()); compiler->fgInsertStmtAtEnd(checkBlock, jmpStmt); } @@ -682,35 +765,94 @@ class IndirectCallTransformer InlineCandidateInfo* inlineInfo = origCall->gtInlineCandidateInfo; CORINFO_CLASS_HANDLE clsHnd = inlineInfo->guardedClassHandle; - // copy 'this' to temp with exact type. + // + // Copy the 'this' for the devirtualized call to a new temp. For + // class-based GDV this will allow us to set the exact type on that + // temp. For delegate GDV, this will be the actual 'this' object + // stored in the delegate. + // const unsigned thisTemp = compiler->lvaGrabTemp(false DEBUGARG("guarded devirt this exact temp")); GenTree* clonedObj = compiler->gtCloneExpr(origCall->gtArgs.GetThisArg()->GetNode()); - GenTree* assign = compiler->gtNewTempAssign(thisTemp, clonedObj); - compiler->lvaSetClass(thisTemp, clsHnd, true); + GenTree* newThisObj; + if (origCall->IsDelegateInvoke()) + { + GenTree* offset = + compiler->gtNewIconNode((ssize_t)compiler->eeGetEEInfo()->offsetOfDelegateInstance, TYP_I_IMPL); + newThisObj = compiler->gtNewOperNode(GT_ADD, TYP_BYREF, clonedObj, offset); + newThisObj = compiler->gtNewIndir(TYP_REF, newThisObj); + } + else + { + newThisObj = clonedObj; + } + GenTree* assign = compiler->gtNewTempAssign(thisTemp, newThisObj); + + if (clsHnd != NO_CLASS_HANDLE) + { + compiler->lvaSetClass(thisTemp, clsHnd, true); + } + else + { + compiler->lvaSetClass(thisTemp, + compiler->info.compCompHnd->getMethodClass(inlineInfo->guardedMethodHandle)); + } + compiler->fgNewStmtAtEnd(thenBlock, assign); - // Clone call. Note we must use the special candidate helper. + // Clone call for the devirtualized case. Note we must use the + // special candidate helper and we need to use the new 'this'. GenTreeCall* call = compiler->gtCloneCandidateCall(origCall); call->gtArgs.GetThisArg()->SetEarlyNode(compiler->gtNewLclvNode(thisTemp, TYP_REF)); call->SetIsGuarded(); JITDUMP("Direct call [%06u] in block " FMT_BB "\n", compiler->dspTreeID(call), thenBlock->bbNum); - // Then invoke impDevirtualizeCall to actually transform the call for us, - // given the original (base) method and the exact guarded class. It should succeed. - // - CORINFO_METHOD_HANDLE methodHnd = call->gtCallMethHnd; - unsigned methodFlags = compiler->info.compCompHnd->getMethodAttribs(methodHnd); - CORINFO_CONTEXT_HANDLE context = inlineInfo->exactContextHnd; - const bool isLateDevirtualization = true; - const bool explicitTailCall = (call->AsCall()->gtCallMoreFlags & GTF_CALL_M_EXPLICIT_TAILCALL) != 0; - compiler->impDevirtualizeCall(call, nullptr, &methodHnd, &methodFlags, &context, nullptr, - isLateDevirtualization, explicitTailCall); + CORINFO_METHOD_HANDLE methodHnd = call->gtCallMethHnd; + CORINFO_CONTEXT_HANDLE context = inlineInfo->exactContextHnd; + if (clsHnd != NO_CLASS_HANDLE) + { + // Then invoke impDevirtualizeCall to actually transform the call for us, + // given the original (base) method and the exact guarded class. It should succeed. + // + unsigned methodFlags = compiler->info.compCompHnd->getMethodAttribs(methodHnd); + const bool isLateDevirtualization = true; + const bool explicitTailCall = (call->AsCall()->gtCallMoreFlags & GTF_CALL_M_EXPLICIT_TAILCALL) != 0; + compiler->impDevirtualizeCall(call, nullptr, &methodHnd, &methodFlags, &context, nullptr, + isLateDevirtualization, explicitTailCall); + } + else + { + // Otherwise we know the exact method already, so just change + // the call as necessary here. + call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK; + call->gtCallMethHnd = methodHnd = inlineInfo->guardedMethodHandle; + call->gtCallType = CT_USER_FUNC; + call->gtCallMoreFlags |= GTF_CALL_M_DEVIRTUALIZED; + call->gtCallMoreFlags &= ~GTF_CALL_M_DELEGATE_INV; + // TODO-GDV: To support R2R we need to get the entry point + // here. We should unify with the tail of impDevirtualizeCall. + + if (origCall->IsVirtual()) + { + // Virtual calls include an implicit null check, which we may + // now need to make explicit. + bool isExact; + bool objIsNonNull; + compiler->gtGetClassHandle(newThisObj, &isExact, &objIsNonNull); + + if (!objIsNonNull) + { + call->gtFlags |= GTF_CALL_NULLCHECK; + } + } + + context = MAKE_METHODCONTEXT(methodHnd); + } // We know this call can devirtualize or we would not have set up GDV here. - // So impDevirtualizeCall should succeed in devirtualizing. + // So above code should succeed in devirtualizing. // - assert(!call->IsVirtual()); + assert(!call->IsVirtual() && !call->IsDelegateInvoke()); // If the devirtualizer was unable to transform the call to invoke the unboxed entry, the inline info // we set up may be invalid. We won't be able to inline anyways. So demote the call as an inline candidate. @@ -776,7 +918,7 @@ class IndirectCallTransformer } //------------------------------------------------------------------------ - // CreateElse: create else block. This executes the unaltered indirect call. + // CreateElse: create else block. This executes the original indirect call. // virtual void CreateElse() { @@ -796,6 +938,38 @@ class IndirectCallTransformer newStmt->SetRootNode(assign); } + if (m_targetLclNum != BAD_VAR_NUM) + { + if (call->IsVirtualVtable()) + { + // We already loaded the target once for the check, so reuse it from the temp. + call->gtControlExpr = compiler->gtNewLclvNode(m_targetLclNum, TYP_I_IMPL); + call->SetExpandedEarly(); + } + else if (call->IsDelegateInvoke()) + { + // Target was saved into a temp during check. We expand the + // delegate call to a CT_INDIRECT call that uses the target + // directly, somewhat similarly to LowerDelegateInvoke. + call->gtCallType = CT_INDIRECT; + call->gtCallAddr = compiler->gtNewLclvNode(m_targetLclNum, TYP_I_IMPL); + call->gtCallCookie = nullptr; + call->gtCallMoreFlags &= ~GTF_CALL_M_DELEGATE_INV; + + GenTree* thisOffset = + compiler->gtNewIconNode((ssize_t)compiler->eeGetEEInfo()->offsetOfDelegateInstance, TYP_I_IMPL); + CallArg* thisArg = call->gtArgs.GetThisArg(); + GenTree* delegateObj = thisArg->GetNode(); + + assert(delegateObj->OperIsLocal()); + GenTree* newThis = + compiler->gtNewOperNode(GT_ADD, TYP_BYREF, compiler->gtCloneExpr(delegateObj), thisOffset); + newThis = compiler->gtNewIndir(TYP_REF, newThis); + + thisArg->SetEarlyNode(newThis); + } + } + compiler->fgInsertStmtAtEnd(elseBlock, newStmt); // Set the original statement to a nop. @@ -1005,6 +1179,62 @@ class IndirectCallTransformer private: unsigned returnTemp; Statement* lastStmt; + + //------------------------------------------------------------------------ + // CreateTreeForLookup: Create a tree representing a lookup of a method address. + // + // Arguments: + // methHnd - the handle for the method the lookup is for + // lookup - lookup information for the address + // + // Returns: + // A node representing the lookup. + // + GenTree* CreateTreeForLookup(CORINFO_METHOD_HANDLE methHnd, const CORINFO_CONST_LOOKUP& lookup) + { + switch (lookup.accessType) + { + case IAT_VALUE: + { + return CreateFunctionTargetAddr(methHnd, lookup); + } + case IAT_PVALUE: + { + GenTree* tree = CreateFunctionTargetAddr(methHnd, lookup); + tree = compiler->gtNewIndir(TYP_I_IMPL, tree); + tree->gtFlags |= GTF_IND_NONFAULTING | GTF_IND_INVARIANT; + tree->gtFlags &= ~GTF_EXCEPT; + return tree; + } + case IAT_PPVALUE: + { + noway_assert(!"Unexpected IAT_PPVALUE"); + return nullptr; + } + case IAT_RELPVALUE: + { + GenTree* addr = CreateFunctionTargetAddr(methHnd, lookup); + GenTree* tree = CreateFunctionTargetAddr(methHnd, lookup); + tree = compiler->gtNewIndir(TYP_I_IMPL, tree); + tree->gtFlags |= GTF_IND_NONFAULTING | GTF_IND_INVARIANT; + tree->gtFlags &= ~GTF_EXCEPT; + tree = compiler->gtNewOperNode(GT_ADD, TYP_I_IMPL, tree, addr); + return tree; + } + default: + { + noway_assert(!"Bad accessType"); + return nullptr; + } + } + } + + GenTree* CreateFunctionTargetAddr(CORINFO_METHOD_HANDLE methHnd, const CORINFO_CONST_LOOKUP& lookup) + { + GenTree* con = compiler->gtNewIconHandleNode((size_t)lookup.addr, GTF_ICON_FTN_ADDR); + INDEBUG(con->AsIntCon()->gtTargetHandle = (size_t)methHnd); + return con; + } }; // Runtime lookup with dynamic dictionary expansion transformer, diff --git a/src/coreclr/jit/inline.h b/src/coreclr/jit/inline.h index 763f5bc382c46b..f21a77da4fa147 100644 --- a/src/coreclr/jit/inline.h +++ b/src/coreclr/jit/inline.h @@ -577,10 +577,10 @@ class InlineResult bool m_reportFailureAsVmFailure; }; -// ClassProfileCandidateInfo provides information about +// HandleHistogramProfileCandidateInfo provides information about // profiling an indirect or virtual call. // -struct ClassProfileCandidateInfo +struct HandleHistogramProfileCandidateInfo { IL_OFFSET ilOffset; unsigned probeIndex; @@ -589,7 +589,7 @@ struct ClassProfileCandidateInfo // GuardedDevirtualizationCandidateInfo provides information about // a potential target of a virtual or interface call. // -struct GuardedDevirtualizationCandidateInfo : ClassProfileCandidateInfo +struct GuardedDevirtualizationCandidateInfo : HandleHistogramProfileCandidateInfo { CORINFO_CLASS_HANDLE guardedClassHandle; CORINFO_METHOD_HANDLE guardedMethodHandle; diff --git a/src/coreclr/jit/jit.h b/src/coreclr/jit/jit.h index cf0e7996500c55..110c079ee58c40 100644 --- a/src/coreclr/jit/jit.h +++ b/src/coreclr/jit/jit.h @@ -327,8 +327,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX typedef class ICorJitInfo* COMP_HANDLE; -const CORINFO_CLASS_HANDLE NO_CLASS_HANDLE = nullptr; -const CORINFO_FIELD_HANDLE NO_FIELD_HANDLE = nullptr; +const CORINFO_CLASS_HANDLE NO_CLASS_HANDLE = nullptr; +const CORINFO_FIELD_HANDLE NO_FIELD_HANDLE = nullptr; +const CORINFO_METHOD_HANDLE NO_METHOD_HANDLE = nullptr; /*****************************************************************************/ @@ -839,19 +840,26 @@ T dspOffset(T o) #endif // !defined(DEBUG) -struct LikelyClassRecord +struct LikelyClassMethodRecord { - CORINFO_CLASS_HANDLE clsHandle; - UINT32 likelihood; + intptr_t handle; + UINT32 likelihood; }; -extern "C" UINT32 WINAPI getLikelyClasses(LikelyClassRecord* pLikelyClasses, +extern "C" UINT32 WINAPI getLikelyClasses(LikelyClassMethodRecord* pLikelyClasses, UINT32 maxLikelyClasses, ICorJitInfo::PgoInstrumentationSchema* schema, UINT32 countSchemaItems, BYTE* pInstrumentationData, int32_t ilOffset); +extern "C" UINT32 WINAPI getLikelyMethods(LikelyClassMethodRecord* pLikelyMethods, + UINT32 maxLikelyMethods, + ICorJitInfo::PgoInstrumentationSchema* schema, + UINT32 countSchemaItems, + BYTE* pInstrumentationData, + int32_t ilOffset); + /*****************************************************************************/ #endif //_JIT_H_ /*****************************************************************************/ diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 569b030b95a84f..aa562c20b5e6a7 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -552,6 +552,8 @@ CONFIG_INTEGER(JitConsumeProfileForCasts, W("JitConsumeProfileForCasts"), 1) // // castclass/isinst CONFIG_INTEGER(JitClassProfiling, W("JitClassProfiling"), 1) // Profile virtual and interface calls +CONFIG_INTEGER(JitDelegateProfiling, W("JitDelegateProfiling"), 1) // Profile resolved delegate call targets +CONFIG_INTEGER(JitVTableProfiling, W("JitVTableProfiling"), 0) // Profile resolved vtable call targets CONFIG_INTEGER(JitEdgeProfiling, W("JitEdgeProfiling"), 1) // Profile edges instead of blocks CONFIG_INTEGER(JitCollect64BitCounts, W("JitCollect64BitCounts"), 0) // Collect counts as 64-bit values. diff --git a/src/coreclr/jit/likelyclass.cpp b/src/coreclr/jit/likelyclass.cpp index 632c9ce8b847b9..277d38201448de 100644 --- a/src/coreclr/jit/likelyclass.cpp +++ b/src/coreclr/jit/likelyclass.cpp @@ -26,45 +26,45 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // Data item in class profile histogram // -struct LikelyClassHistogramEntry +struct LikelyClassMethodHistogramEntry { - // Class that was observed at runtime - INT_PTR m_mt; // This may be an "unknown type handle" + // Handle that was observed at runtime + INT_PTR m_handle; // This may be an "unknown handle" // Number of observations in the table unsigned m_count; }; // Summarizes a ClassProfile table by forming a Histogram // -struct LikelyClassHistogram +struct LikelyClassMethodHistogram { - LikelyClassHistogram(INT_PTR* histogramEntries, unsigned entryCount); + LikelyClassMethodHistogram(INT_PTR* histogramEntries, unsigned entryCount); // Sum of counts from all entries in the histogram. This includes "unknown" entries which are not captured in // m_histogram unsigned m_totalCount; - // Rough guess at count of unknown types - unsigned m_unknownTypes; + // Rough guess at count of unknown handles + unsigned m_unknownHandles; // Histogram entries, in no particular order. - LikelyClassHistogramEntry m_histogram[HISTOGRAM_MAX_SIZE_COUNT]; - UINT32 countHistogramElements = 0; + LikelyClassMethodHistogramEntry m_histogram[HISTOGRAM_MAX_SIZE_COUNT]; + UINT32 countHistogramElements = 0; - LikelyClassHistogramEntry HistogramEntryAt(unsigned index) + LikelyClassMethodHistogramEntry HistogramEntryAt(unsigned index) { return m_histogram[index]; } }; //------------------------------------------------------------------------ -// LikelyClassHistogram::LikelyClassHistgram: construct a new histogram +// LikelyClassMethodHistogram::LikelyClassMethodHistgram: construct a new histogram // // Arguments: // histogramEntries - pointer to the table portion of a ClassProfile* object (see corjit.h) // entryCount - number of entries in the table to examine // -LikelyClassHistogram::LikelyClassHistogram(INT_PTR* histogramEntries, unsigned entryCount) +LikelyClassMethodHistogram::LikelyClassMethodHistogram(INT_PTR* histogramEntries, unsigned entryCount) { - m_unknownTypes = 0; + m_unknownHandles = 0; m_totalCount = 0; uint32_t unknownTypeHandleMask = 0; @@ -83,7 +83,7 @@ LikelyClassHistogram::LikelyClassHistogram(INT_PTR* histogramEntries, unsigned e unsigned h = 0; for (; h < countHistogramElements; h++) { - if (m_histogram[h].m_mt == currentEntry) + if (m_histogram[h].m_handle == currentEntry) { m_histogram[h].m_count++; found = true; @@ -97,8 +97,8 @@ LikelyClassHistogram::LikelyClassHistogram(INT_PTR* histogramEntries, unsigned e { continue; } - LikelyClassHistogramEntry newEntry; - newEntry.m_mt = currentEntry; + LikelyClassMethodHistogramEntry newEntry; + newEntry.m_handle = currentEntry; newEntry.m_count = 1; m_histogram[countHistogramElements++] = newEntry; } @@ -106,42 +106,28 @@ LikelyClassHistogram::LikelyClassHistogram(INT_PTR* histogramEntries, unsigned e } //------------------------------------------------------------------------ -// getLikelyClasses: find class profile data for an IL offset, and return the most likely classes +// getLikelyClassesOrMethods: +// Find class/method profile data for an IL offset, and return the most +// likely classes/methods. // -// Arguments: -// pLikelyClasses - [OUT] array of likely classes sorted by likelihood (descending). It must be -// at least of 'maxLikelyClasses' (next argument) length. -// The array consists of pairs "clsHandle - likelihood" ordered by likelihood -// (descending) where likelihood can be any value in [0..100] range. clsHandle -// is never null for [0..) range, Items in -// [..maxLikelyClasses) are zeroed if the number -// of classes seen is less than maxLikelyClasses provided. -// maxLikelyClasses - limit for likely classes to output -// schema - profile schema -// countSchemaItems - number of items in the schema -// pInstrumentationData - associated data -// ilOffset - il offset of the callvirt +// This is a common entrypoint for getLikelyClasses and getLikelyMethods. +// See documentation for those for more information. // -// Returns: -// Estimated number of classes seen at runtime -// -// Notes: -// A "monomorphic" call site will return likelihood 100 and number of entries = 1. -// -// This is used by the devirtualization logic below, and by crossgen2 when producing -// the R2R image (to reduce the sizecost of carrying the type histogram) -// -// This code can runs without a jit instance present, so JITDUMP and related -// cannot be used. -// -extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassRecord* pLikelyClasses, - UINT32 maxLikelyClasses, - ICorJitInfo::PgoInstrumentationSchema* schema, - UINT32 countSchemaItems, - BYTE* pInstrumentationData, - int32_t ilOffset) +static unsigned getLikelyClassesOrMethods(LikelyClassMethodRecord* pLikelyEntries, + UINT32 maxLikelyClasses, + ICorJitInfo::PgoInstrumentationSchema* schema, + UINT32 countSchemaItems, + BYTE* pInstrumentationData, + int32_t ilOffset, + bool types) { - ZeroMemory(pLikelyClasses, maxLikelyClasses * sizeof(*pLikelyClasses)); + ICorJitInfo::PgoInstrumentationKind histogramKind = + types ? ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes + : ICorJitInfo::PgoInstrumentationKind::HandleHistogramMethods; + ICorJitInfo::PgoInstrumentationKind compressedKind = types ? ICorJitInfo::PgoInstrumentationKind::GetLikelyClass + : ICorJitInfo::PgoInstrumentationKind::GetLikelyMethod; + + memset(pLikelyEntries, 0, maxLikelyClasses * sizeof(*pLikelyEntries)); if (schema == nullptr) { @@ -153,17 +139,16 @@ extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassRecord* if (schema[i].ILOffset != ilOffset) continue; - if ((schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::GetLikelyClass) && - (schema[i].Count == 1)) + if ((schema[i].InstrumentationKind == compressedKind) && (schema[i].Count == 1)) { - INT_PTR result = *(INT_PTR*)(pInstrumentationData + schema[i].Offset); + intptr_t result = *(intptr_t*)(pInstrumentationData + schema[i].Offset); if (ICorJitInfo::IsUnknownHandle(result)) { return 0; } - assert(result != 0); // we don't expect zero in GetLikelyClass - pLikelyClasses[0].likelihood = (UINT32)(schema[i].Other & 0xFF); - pLikelyClasses[0].clsHandle = (CORINFO_CLASS_HANDLE)result; + assert(result != 0); // we don't expect zero in GetLikelyClass/GetLikelyMethod + pLikelyEntries[0].likelihood = (UINT32)(schema[i].Other & 0xFF); + pLikelyEntries[0].handle = result; return 1; } @@ -172,11 +157,11 @@ extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassRecord* (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramLongCount); if (isHistogramCount && (schema[i].Count == 1) && ((i + 1) < countSchemaItems) && - (schema[i + 1].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes)) + (schema[i + 1].InstrumentationKind == histogramKind)) { // Form a histogram // - LikelyClassHistogram h((INT_PTR*)(pInstrumentationData + schema[i + 1].Offset), schema[i + 1].Count); + LikelyClassMethodHistogram h((INT_PTR*)(pInstrumentationData + schema[i + 1].Offset), schema[i + 1].Count); // Use histogram count as number of classes estimate // Report back what we've learned @@ -189,45 +174,45 @@ extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassRecord* case 1: { - LikelyClassHistogramEntry const hist0 = h.HistogramEntryAt(0); + LikelyClassMethodHistogramEntry const hist0 = h.HistogramEntryAt(0); // Fast path for monomorphic cases - if (ICorJitInfo::IsUnknownHandle(hist0.m_mt)) + if (ICorJitInfo::IsUnknownHandle(hist0.m_handle)) { return 0; } - pLikelyClasses[0].likelihood = 100; - pLikelyClasses[0].clsHandle = (CORINFO_CLASS_HANDLE)hist0.m_mt; + pLikelyEntries[0].likelihood = 100; + pLikelyEntries[0].handle = hist0.m_handle; return 1; } case 2: { - LikelyClassHistogramEntry const hist0 = h.HistogramEntryAt(0); - LikelyClassHistogramEntry const hist1 = h.HistogramEntryAt(1); // Fast path for two classes - if ((hist0.m_count >= hist1.m_count) && !ICorJitInfo::IsUnknownHandle(hist0.m_mt)) + LikelyClassMethodHistogramEntry const hist0 = h.HistogramEntryAt(0); + LikelyClassMethodHistogramEntry const hist1 = h.HistogramEntryAt(1); + if ((hist0.m_count >= hist1.m_count) && !ICorJitInfo::IsUnknownHandle(hist0.m_handle)) { - pLikelyClasses[0].likelihood = (100 * hist0.m_count) / h.m_totalCount; - pLikelyClasses[0].clsHandle = (CORINFO_CLASS_HANDLE)hist0.m_mt; + pLikelyEntries[0].likelihood = (100 * hist0.m_count) / h.m_totalCount; + pLikelyEntries[0].handle = hist0.m_handle; - if ((maxLikelyClasses > 1) && !ICorJitInfo::IsUnknownHandle(hist1.m_mt)) + if ((maxLikelyClasses > 1) && !ICorJitInfo::IsUnknownHandle(hist1.m_handle)) { - pLikelyClasses[1].likelihood = (100 * hist1.m_count) / h.m_totalCount; - pLikelyClasses[1].clsHandle = (CORINFO_CLASS_HANDLE)hist1.m_mt; + pLikelyEntries[1].likelihood = (100 * hist1.m_count) / h.m_totalCount; + pLikelyEntries[1].handle = hist1.m_handle; return 2; } return 1; } - if (!ICorJitInfo::IsUnknownHandle(hist1.m_mt)) + if (!ICorJitInfo::IsUnknownHandle(hist1.m_handle)) { - pLikelyClasses[0].likelihood = (100 * hist1.m_count) / h.m_totalCount; - pLikelyClasses[0].clsHandle = (CORINFO_CLASS_HANDLE)hist1.m_mt; + pLikelyEntries[0].likelihood = (100 * hist1.m_count) / h.m_totalCount; + pLikelyEntries[0].handle = hist1.m_handle; - if ((maxLikelyClasses > 1) && !ICorJitInfo::IsUnknownHandle(hist0.m_mt)) + if ((maxLikelyClasses > 1) && !ICorJitInfo::IsUnknownHandle(hist0.m_handle)) { - pLikelyClasses[1].likelihood = (100 * hist0.m_count) / h.m_totalCount; - pLikelyClasses[1].clsHandle = (CORINFO_CLASS_HANDLE)hist0.m_mt; + pLikelyEntries[1].likelihood = (100 * hist0.m_count) / h.m_totalCount; + pLikelyEntries[1].handle = hist0.m_handle; return 2; } return 1; @@ -237,14 +222,14 @@ extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassRecord* default: { - LikelyClassHistogramEntry sortedEntries[HISTOGRAM_MAX_SIZE_COUNT]; + LikelyClassMethodHistogramEntry sortedEntries[HISTOGRAM_MAX_SIZE_COUNT]; // Since this method can be invoked without a jit instance we can't use any existing allocators unsigned knownHandles = 0; for (unsigned m = 0; m < h.countHistogramElements; m++) { - LikelyClassHistogramEntry const hist = h.HistogramEntryAt(m); - if (!ICorJitInfo::IsUnknownHandle(hist.m_mt)) + LikelyClassMethodHistogramEntry const hist = h.HistogramEntryAt(m); + if (!ICorJitInfo::IsUnknownHandle(hist.m_handle)) { sortedEntries[knownHandles++] = hist; } @@ -252,7 +237,8 @@ extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassRecord* // sort by m_count (descending) jitstd::sort(sortedEntries, sortedEntries + knownHandles, - [](const LikelyClassHistogramEntry& h1, const LikelyClassHistogramEntry& h2) -> bool { + [](const LikelyClassMethodHistogramEntry& h1, + const LikelyClassMethodHistogramEntry& h2) -> bool { return h1.m_count > h2.m_count; }); @@ -260,9 +246,9 @@ extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassRecord* for (size_t hIdx = 0; hIdx < numberOfClasses; hIdx++) { - LikelyClassHistogramEntry const hc = sortedEntries[hIdx]; - pLikelyClasses[hIdx].clsHandle = (CORINFO_CLASS_HANDLE)hc.m_mt; - pLikelyClasses[hIdx].likelihood = hc.m_count * 100 / h.m_totalCount; + LikelyClassMethodHistogramEntry const hc = sortedEntries[hIdx]; + pLikelyEntries[hIdx].handle = hc.m_handle; + pLikelyEntries[hIdx].likelihood = hc.m_count * 100 / h.m_totalCount; } return numberOfClasses; } @@ -276,8 +262,64 @@ extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassRecord* } //------------------------------------------------------------------------ -// getRandomClass: find class profile data for an IL offset, and return -// one of the possible classes at random +// getLikelyClasses: find class profile data for an IL offset, and return the most likely classes +// +// Arguments: +// pLikelyClasses - [OUT] array of likely classes sorted by likelihood (descending). It must be +// at least of 'maxLikelyClasses' (next argument) length. +// The array consists of pairs "clsHandle - likelihood" ordered by likelihood +// (descending) where likelihood can be any value in [0..100] range. clsHandle +// is never null for [0..) range, Items in +// [..maxLikelyClasses) are zeroed if the number +// of classes seen is less than maxLikelyClasses provided. +// maxLikelyClasses - limit for likely classes to output +// schema - profile schema +// countSchemaItems - number of items in the schema +// pInstrumentationData - associated data +// ilOffset - il offset of the callvirt +// +// Returns: +// Estimated number of classes seen at runtime +// +// Notes: +// A "monomorphic" call site will return likelihood 100 and number of entries = 1. +// +// This is used by the devirtualization logic below, and by crossgen2 when producing +// the R2R image (to reduce the sizecost of carrying the type histogram) +// +// This code can runs without a jit instance present, so JITDUMP and related +// cannot be used. +// +extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassMethodRecord* pLikelyClasses, + UINT32 maxLikelyClasses, + ICorJitInfo::PgoInstrumentationSchema* schema, + UINT32 countSchemaItems, + BYTE* pInstrumentationData, + int32_t ilOffset) +{ + return getLikelyClassesOrMethods(pLikelyClasses, maxLikelyClasses, schema, countSchemaItems, pInstrumentationData, + ilOffset, true); +} + +//------------------------------------------------------------------------ +// getLikelyMethods: find method profile data for an IL offset, and return the most likely methods +// +// See documentation on getLikelyClasses above. +// +extern "C" DLLEXPORT UINT32 WINAPI getLikelyMethods(LikelyClassMethodRecord* pLikelyMethods, + UINT32 maxLikelyMethods, + ICorJitInfo::PgoInstrumentationSchema* schema, + UINT32 countSchemaItems, + BYTE* pInstrumentationData, + int32_t ilOffset) +{ + return getLikelyClassesOrMethods(pLikelyMethods, maxLikelyMethods, schema, countSchemaItems, pInstrumentationData, + ilOffset, false); +} + +//------------------------------------------------------------------------ +// getRandomGDV: find GDV profile data for an IL offset, and return +// one of the possible methods/classes at random // // Arguments: // schema - profile schema @@ -289,17 +331,48 @@ extern "C" DLLEXPORT UINT32 WINAPI getLikelyClasses(LikelyClassRecord* // Returns: // Randomly observed class, or nullptr. // -CORINFO_CLASS_HANDLE Compiler::getRandomClass(ICorJitInfo::PgoInstrumentationSchema* schema, - UINT32 countSchemaItems, - BYTE* pInstrumentationData, - int32_t ilOffset, - CLRRandom* random) +void Compiler::getRandomGDV(ICorJitInfo::PgoInstrumentationSchema* schema, + UINT32 countSchemaItems, + BYTE* pInstrumentationData, + int32_t ilOffset, + CLRRandom* random, + CORINFO_CLASS_HANDLE* classGuess, + CORINFO_METHOD_HANDLE* methodGuess) { + *classGuess = NO_CLASS_HANDLE; + *methodGuess = NO_METHOD_HANDLE; + if (schema == nullptr) { - return NO_CLASS_HANDLE; + return; } + // We can have multiple histograms for the same IL offset. Use reservoir + // sampling to pick an entry at random. + int numElementsSeen = 0; + auto addElement = [random, classGuess, methodGuess, &numElementsSeen](intptr_t handle, bool isClass) { + if (ICorJitInfo::IsUnknownHandle(handle)) + { + return; + } + + numElementsSeen++; + bool replace = (numElementsSeen == 1) || (random->Next(numElementsSeen) == 0); + if (replace) + { + if (isClass) + { + *classGuess = (CORINFO_CLASS_HANDLE)handle; + *methodGuess = NO_METHOD_HANDLE; + } + else + { + *classGuess = NO_CLASS_HANDLE; + *methodGuess = (CORINFO_METHOD_HANDLE)handle; + } + } + }; + for (COUNT_T i = 0; i < countSchemaItems; i++) { if (schema[i].ILOffset != (int32_t)ilOffset) @@ -307,18 +380,13 @@ CORINFO_CLASS_HANDLE Compiler::getRandomClass(ICorJitInfo::PgoInstrumentationSch continue; } - if ((schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::GetLikelyClass) && + if (((schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::GetLikelyClass) || + (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::GetLikelyMethod)) && (schema[i].Count == 1)) { INT_PTR result = *(INT_PTR*)(pInstrumentationData + schema[i].Offset); - if (ICorJitInfo::IsUnknownHandle(result)) - { - return NO_CLASS_HANDLE; - } - else - { - return (CORINFO_CLASS_HANDLE)result; - } + addElement(result, true); + continue; } bool isHistogramCount = @@ -326,30 +394,21 @@ CORINFO_CLASS_HANDLE Compiler::getRandomClass(ICorJitInfo::PgoInstrumentationSch (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramLongCount); if (isHistogramCount && (schema[i].Count == 1) && ((i + 1) < countSchemaItems) && - (schema[i + 1].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes)) + ((schema[i + 1].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes) || + (schema[i + 1].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramMethods))) { - // Form a histogram + // Form a histogram. Note that even though we use reservoir + // sampling we want to weigh distinct handles equally, regardless + // of count. // - LikelyClassHistogram h((INT_PTR*)(pInstrumentationData + schema[i + 1].Offset), schema[i + 1].Count); + LikelyClassMethodHistogram h((INT_PTR*)(pInstrumentationData + schema[i + 1].Offset), schema[i + 1].Count); - if (h.countHistogramElements == 0) + bool isClass = + schema[i + 1].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes; + for (UINT32 i = 0; i < h.countHistogramElements; i++) { - return NO_CLASS_HANDLE; + addElement(h.HistogramEntryAt(i).m_handle, isClass); } - - // Choose an entry at random. - // - unsigned randomEntryIndex = random->Next(0, h.countHistogramElements); - LikelyClassHistogramEntry randomEntry = h.HistogramEntryAt(randomEntryIndex); - - if (ICorJitInfo::IsUnknownHandle(randomEntry.m_mt)) - { - return NO_CLASS_HANDLE; - } - - return (CORINFO_CLASS_HANDLE)randomEntry.m_mt; } } - - return NO_CLASS_HANDLE; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 61230a0e8292c8..357aaafa6bd219 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -6709,9 +6709,11 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) (call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr, call->IsTailPrefixedCall(), tailCallResult, nullptr); - // Are we currently planning to expand the gtControlExpr as an early virtual call target? + // Do some profitability checks for whether we should expand a vtable call + // target early. Note that we may already have expanded it due to GDV at + // this point, so make sure we do not undo that work. // - if (call->IsExpandedEarly() && call->IsVirtualVtable()) + if (call->IsExpandedEarly() && call->IsVirtualVtable() && (call->gtControlExpr == nullptr)) { assert(call->gtArgs.HasThisPointer()); // It isn't alway profitable to expand a virtual call early @@ -8482,18 +8484,18 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call) // if (call->IsExpandedEarly() && call->IsVirtualVtable()) { - // We only expand the Vtable Call target once in the global morph phase - if (fgGlobalMorph) + // We expand the Vtable Call target either in the global morph phase or + // in guarded devirt if we need it for the guard. + if (fgGlobalMorph && (call->gtControlExpr == nullptr)) { - assert(call->gtControlExpr == nullptr); // We only call this method and assign gtControlExpr once call->gtControlExpr = fgExpandVirtualVtableCallTarget(call); } // We always have to morph or re-morph the control expr // call->gtControlExpr = fgMorphTree(call->gtControlExpr); - // Propagate any gtFlags into the call - call->gtFlags |= call->gtControlExpr->gtFlags; + // Propagate any side effect flags into the call + call->gtFlags |= call->gtControlExpr->gtFlags & GTF_ALL_EFFECT; } // Morph stelem.ref helper call to store a null value, into a store into an array without the helper. diff --git a/src/coreclr/jit/patchpoint.cpp b/src/coreclr/jit/patchpoint.cpp index d1478c51c31239..bf7560dd431a1f 100644 --- a/src/coreclr/jit/patchpoint.cpp +++ b/src/coreclr/jit/patchpoint.cpp @@ -78,7 +78,7 @@ class PatchpointTransformer // If we're instrumenting, we should not have decided to // put class probes here, as that is driven by looking at IL. // - assert((block->bbFlags & BBF_HAS_CLASS_PROFILE) == 0); + assert((block->bbFlags & BBF_HAS_HISTOGRAM_PROFILE) == 0); // Clear the partial comp flag. // diff --git a/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs b/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs index 30d8af4efb23c7..d9671ef058ae13 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ModuleHeaders.cs @@ -15,7 +15,7 @@ internal struct ReadyToRunHeaderConstants public const uint Signature = 0x00525452; // 'RTR' public const ushort CurrentMajorVersion = 6; - public const ushort CurrentMinorVersion = 1; + public const ushort CurrentMinorVersion = 2; } #pragma warning disable 0169 diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs index b6d7649864b994..3bb774ed11b500 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs @@ -286,9 +286,14 @@ which is the right helper to use to allocate an object of a given type. */ CORINFO_HELP_STACK_PROBE, // Probes each page of the allocated stack frame CORINFO_HELP_PATCHPOINT, // Notify runtime that code has reached a patchpoint + CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT, // Notify runtime that code has reached a part of the method that wasn't originally jitted. + CORINFO_HELP_CLASSPROFILE32, // Update 32-bit class profile for a call site CORINFO_HELP_CLASSPROFILE64, // Update 64-bit class profile for a call site - CORINFO_HELP_PARTIAL_COMPILATION_PATCHPOINT, // Notify runtime that code has reached a part of the method that wasn't originally jitted. + CORINFO_HELP_DELEGATEPROFILE32, // Update 32-bit method profile for a delegate call site + CORINFO_HELP_DELEGATEPROFILE64, // Update 64-bit method profile for a delegate call site + CORINFO_HELP_VTABLEPROFILE32, // Update 32-bit method profile for a vtable call site + CORINFO_HELP_VTABLEPROFILE64, // Update 64-bit method profile for a vtable call site CORINFO_HELP_VALIDATE_INDIRECT_CALL, // CFG: Validate function pointer CORINFO_HELP_DISPATCH_INDIRECT_CALL, // CFG: Validate and dispatch to pointer diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs index 3c1eb891ac4f81..27dbbfb44b9f8e 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoImpl.cs @@ -101,20 +101,23 @@ public static IntPtr Get() private static readonly IntPtr s_jit; } - private struct LikelyClassRecord + private struct LikelyClassMethodRecord { - public IntPtr clsHandle; + public IntPtr handle; public uint likelihood; - public LikelyClassRecord(IntPtr clsHandle, uint likelihood) + public LikelyClassMethodRecord(IntPtr handle, uint likelihood) { - this.clsHandle = clsHandle; + this.handle = handle; this.likelihood = likelihood; } } [DllImport(JitLibrary)] - private extern static uint getLikelyClasses(LikelyClassRecord* pLikelyClasses, uint maxLikelyClasses, PgoInstrumentationSchema* schema, uint countSchemaItems, byte*pInstrumentationData, int ilOffset); + private extern static uint getLikelyClasses(LikelyClassMethodRecord* pLikelyClasses, uint maxLikelyClasses, PgoInstrumentationSchema* schema, uint countSchemaItems, byte*pInstrumentationData, int ilOffset); + + [DllImport(JitLibrary)] + private extern static uint getLikelyMethods(LikelyClassMethodRecord* pLikelyMethods, uint maxLikelyMethods, PgoInstrumentationSchema* schema, uint countSchemaItems, byte*pInstrumentationData, int ilOffset); [DllImport(JitSupportLibrary)] private extern static IntPtr GetJitHost(IntPtr configProvider); @@ -192,17 +195,18 @@ private Logger Logger public static IEnumerable ConvertTypeHandleHistogramsToCompactTypeHistogramFormat(PgoSchemaElem[] pgoData, CompilationModuleGroup compilationModuleGroup) { - bool hasTypeHistogram = false; + bool hasHistogram = false; foreach (var elem in pgoData) { - if (elem.InstrumentationKind == PgoInstrumentationKind.HandleHistogramTypes) + if (elem.InstrumentationKind == PgoInstrumentationKind.HandleHistogramTypes || + elem.InstrumentationKind == PgoInstrumentationKind.HandleHistogramMethods) { // found histogram - hasTypeHistogram = true; + hasHistogram = true; break; } } - if (!hasTypeHistogram) + if (!hasHistogram) { foreach (var elem in pgoData) { @@ -222,9 +226,10 @@ public static IEnumerable ConvertTypeHandleHistogramsToCompactTyp if ((i + 1 < pgoData.Length) && (pgoData[i].InstrumentationKind == PgoInstrumentationKind.HandleHistogramIntCount || pgoData[i].InstrumentationKind == PgoInstrumentationKind.HandleHistogramLongCount) && - (pgoData[i + 1].InstrumentationKind == PgoInstrumentationKind.HandleHistogramTypes)) + (pgoData[i + 1].InstrumentationKind == PgoInstrumentationKind.HandleHistogramTypes || + pgoData[i + 1].InstrumentationKind == PgoInstrumentationKind.HandleHistogramMethods)) { - PgoSchemaElem? newElem = ComputeLikelyClass(i, handleToObject, nativeSchema, instrumentationData, compilationModuleGroup); + PgoSchemaElem? newElem = ComputeLikelyClassMethod(i, handleToObject, nativeSchema, instrumentationData, compilationModuleGroup); if (newElem.HasValue) { yield return newElem.Value; @@ -249,33 +254,63 @@ IntPtr LocalObjectToHandle(object input) } } - private static PgoSchemaElem? ComputeLikelyClass(int index, Dictionary handleToObject, PgoInstrumentationSchema[] nativeSchema, byte[] instrumentationData, CompilationModuleGroup compilationModuleGroup) + private static PgoSchemaElem? ComputeLikelyClassMethod(int index, Dictionary handleToObject, PgoInstrumentationSchema[] nativeSchema, byte[] instrumentationData, CompilationModuleGroup compilationModuleGroup) { // getLikelyClasses will use two entries from the native schema table. There must be at least two present to avoid overruning the buffer if (index > (nativeSchema.Length - 2)) return null; + bool isType = nativeSchema[index + 1].InstrumentationKind == PgoInstrumentationKind.HandleHistogramTypes; + fixed(PgoInstrumentationSchema* pSchema = &nativeSchema[index]) { fixed(byte* pInstrumentationData = &instrumentationData[0]) { - // We're going to store only the most popular type to reduce size of the profile - LikelyClassRecord* likelyClasses = stackalloc LikelyClassRecord[1]; - uint numberOfClasses = getLikelyClasses(likelyClasses, 1, pSchema, 2, pInstrumentationData, nativeSchema[index].ILOffset); + // We're going to store only the most popular type/method to reduce size of the profile + LikelyClassMethodRecord* likelyClassMethods = stackalloc LikelyClassMethodRecord[1]; + uint numberOfRecords; + if (isType) + { + numberOfRecords = getLikelyClasses(likelyClassMethods, 1, pSchema, 2, pInstrumentationData, nativeSchema[index].ILOffset); + } + else + { + numberOfRecords = getLikelyMethods(likelyClassMethods, 1, pSchema, 2, pInstrumentationData, nativeSchema[index].ILOffset); + } - if (numberOfClasses > 0) + if (numberOfRecords > 0) { - TypeDesc type = (TypeDesc)handleToObject[likelyClasses->clsHandle]; + TypeSystemEntityOrUnknown[] newData = null; + if (isType) + { + TypeDesc type = (TypeDesc)handleToObject[likelyClassMethods->handle]; +#if READYTORUN + if (compilationModuleGroup.VersionsWithType(type)) +#endif + { + newData = new[] { new TypeSystemEntityOrUnknown(type) }; + } + } + else + { + MethodDesc method = (MethodDesc)handleToObject[likelyClassMethods->handle]; + #if READYTORUN - if (compilationModuleGroup.VersionsWithType(type)) + if (compilationModuleGroup.VersionsWithMethodBody(method)) #endif + { + newData = new[] { new TypeSystemEntityOrUnknown(method) }; + } + } + + if (newData != null) { PgoSchemaElem likelyClassElem = new PgoSchemaElem(); - likelyClassElem.InstrumentationKind = PgoInstrumentationKind.GetLikelyClass; + likelyClassElem.InstrumentationKind = isType ? PgoInstrumentationKind.GetLikelyClass : PgoInstrumentationKind.GetLikelyMethod; likelyClassElem.ILOffset = nativeSchema[index].ILOffset; likelyClassElem.Count = 1; - likelyClassElem.Other = (int)(likelyClasses->likelihood | (numberOfClasses << 8)); - likelyClassElem.DataObject = new TypeSystemEntityOrUnknown[] { new TypeSystemEntityOrUnknown(type) }; + likelyClassElem.Other = (int)(likelyClassMethods->likelihood | (numberOfRecords << 8)); + likelyClassElem.DataObject = newData; return likelyClassElem; } } diff --git a/src/coreclr/tools/Common/Pgo/PgoFormat.cs b/src/coreclr/tools/Common/Pgo/PgoFormat.cs index a4e23286be4c67..5dc847b34810b0 100644 --- a/src/coreclr/tools/Common/Pgo/PgoFormat.cs +++ b/src/coreclr/tools/Common/Pgo/PgoFormat.cs @@ -49,6 +49,7 @@ public enum PgoInstrumentationKind EdgeIntCount = (DescriptorMin * 6) | FourByte, // edge counter using unsigned 4 byte int EdgeLongCount = (DescriptorMin * 6) | EightByte, // edge counter using unsigned 8 byte int GetLikelyClass = (DescriptorMin * 7) | TypeHandle, // Compressed get likely class data + GetLikelyMethod = (DescriptorMin * 7) | MethodHandle, // Compressed get likely method data } public interface IPgoSchemaDataLoader diff --git a/src/coreclr/tools/superpmi/mcs/verbdumpmap.cpp b/src/coreclr/tools/superpmi/mcs/verbdumpmap.cpp index 1cfbc598dbee16..2ff9b3cbe36908 100644 --- a/src/coreclr/tools/superpmi/mcs/verbdumpmap.cpp +++ b/src/coreclr/tools/superpmi/mcs/verbdumpmap.cpp @@ -93,8 +93,9 @@ void DumpMap(int index, MethodContext* mc) bool hasClassProfile = false; bool hasMethodProfile = false; bool hasLikelyClass = false; + bool hasLikelyMethod = false; ICorJitInfo::PgoSource pgoSource = ICorJitInfo::PgoSource::Unknown; - if (mc->hasPgoData(hasEdgeProfile, hasClassProfile, hasMethodProfile, hasLikelyClass, pgoSource)) + if (mc->hasPgoData(hasEdgeProfile, hasClassProfile, hasMethodProfile, hasLikelyClass, hasLikelyMethod, pgoSource)) { rawFlags |= 1ULL << (EXTRA_JIT_FLAGS::HAS_PGO); @@ -118,6 +119,11 @@ void DumpMap(int index, MethodContext* mc) rawFlags |= 1ULL << (EXTRA_JIT_FLAGS::HAS_LIKELY_CLASS); } + if (hasLikelyMethod) + { + rawFlags |= 1ULL << (EXTRA_JIT_FLAGS::HAS_LIKELY_METHOD); + } + if (pgoSource == ICorJitInfo::PgoSource::Static) { rawFlags |= 1ULL << (EXTRA_JIT_FLAGS::HAS_STATIC_PROFILE); diff --git a/src/coreclr/tools/superpmi/mcs/verbjitflags.cpp b/src/coreclr/tools/superpmi/mcs/verbjitflags.cpp index a3e31a1f73062f..33190d3fbd9960 100644 --- a/src/coreclr/tools/superpmi/mcs/verbjitflags.cpp +++ b/src/coreclr/tools/superpmi/mcs/verbjitflags.cpp @@ -31,8 +31,9 @@ int verbJitFlags::DoWork(const char* nameOfInput) bool hasClassProfile = false; bool hasMethodProfile = false; bool hasLikelyClass = false; + bool hasLikelyMethod = false; ICorJitInfo::PgoSource pgoSource = ICorJitInfo::PgoSource::Unknown; - if (mc->hasPgoData(hasEdgeProfile, hasClassProfile, hasMethodProfile, hasLikelyClass, pgoSource)) + if (mc->hasPgoData(hasEdgeProfile, hasClassProfile, hasMethodProfile, hasLikelyClass, hasLikelyMethod, pgoSource)) { rawFlags |= 1ULL << (EXTRA_JIT_FLAGS::HAS_PGO); @@ -56,6 +57,11 @@ int verbJitFlags::DoWork(const char* nameOfInput) rawFlags |= 1ULL << (EXTRA_JIT_FLAGS::HAS_LIKELY_CLASS); } + if (hasLikelyMethod) + { + rawFlags |= 1ULL << (EXTRA_JIT_FLAGS::HAS_LIKELY_METHOD); + } + if (pgoSource == ICorJitInfo::PgoSource::Static) { rawFlags |= 1ULL << (EXTRA_JIT_FLAGS::HAS_STATIC_PROFILE); diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp index 97be9fbfc9fcac..df50466b476d31 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.cpp @@ -5607,9 +5607,10 @@ void MethodContext::dmpGetPgoInstrumentationResults(DWORDLONG key, const Agnosti } break; case ICorJitInfo::PgoInstrumentationKind::GetLikelyClass: + case ICorJitInfo::PgoInstrumentationKind::GetLikelyMethod: { - // (N)umber, (L)ikelihood, (C)lass - printf("N %u L %u C %016llX", (unsigned)(pBuf[i].Other >> 8), (unsigned)(pBuf[i].Other && 0xFF), CastHandle(*(uintptr_t*)(pInstrumentationData + pBuf[i].Offset))); + // (N)umber, (L)ikelihood, (H)andle + printf("N %u L %u H %016llX", (unsigned)(pBuf[i].Other >> 8), (unsigned)(pBuf[i].Other && 0xFF), CastHandle(*(uintptr_t*)(pInstrumentationData + pBuf[i].Offset))); } break; default: @@ -7072,12 +7073,13 @@ int MethodContext::dumpMD5HashToBuffer(BYTE* pBuffer, int bufLen, char* hash, in return m_hash.HashBuffer(pBuffer, bufLen, hash, hashLen); } -bool MethodContext::hasPgoData(bool& hasEdgeProfile, bool& hasClassProfile, bool& hasMethodProfile, bool& hasLikelyClass, ICorJitInfo::PgoSource& pgoSource) +bool MethodContext::hasPgoData(bool& hasEdgeProfile, bool& hasClassProfile, bool& hasMethodProfile, bool& hasLikelyClass, bool& hasLikelyMethod, ICorJitInfo::PgoSource& pgoSource) { hasEdgeProfile = false; hasClassProfile = false; hasMethodProfile = false; hasLikelyClass = false; + hasLikelyMethod = false; // Obtain the Method Info structure for this method CORINFO_METHOD_INFO info; @@ -7102,8 +7104,9 @@ bool MethodContext::hasPgoData(bool& hasEdgeProfile, bool& hasClassProfile, bool hasClassProfile |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramTypes); hasMethodProfile |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::HandleHistogramMethods); hasLikelyClass |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::GetLikelyClass); + hasLikelyMethod |= (schema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::GetLikelyMethod); - if (hasEdgeProfile && hasClassProfile && hasLikelyClass) + if (hasEdgeProfile && hasClassProfile && hasLikelyClass && hasLikelyMethod) { break; } diff --git a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h index 816682b1effe9e..5009d6b6a69c40 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h +++ b/src/coreclr/tools/superpmi/superpmi-shared/methodcontext.h @@ -54,6 +54,7 @@ enum EXTRA_JIT_FLAGS HAS_STATIC_PROFILE = 59, HAS_DYNAMIC_PROFILE = 58, HAS_METHOD_PROFILE = 57, + HAS_LIKELY_METHOD = 56, }; // Asserts to catch changes in corjit flags definitions. @@ -64,6 +65,8 @@ static_assert((int)EXTRA_JIT_FLAGS::HAS_CLASS_PROFILE == (int)CORJIT_FLAGS::CorJ static_assert((int)EXTRA_JIT_FLAGS::HAS_LIKELY_CLASS == (int)CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_UNUSED33, "Jit Flags Mismatch"); static_assert((int)EXTRA_JIT_FLAGS::HAS_STATIC_PROFILE == (int)CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_UNUSED32, "Jit Flags Mismatch"); static_assert((int)EXTRA_JIT_FLAGS::HAS_DYNAMIC_PROFILE == (int)CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_UNUSED31, "Jit Flags Mismatch"); +static_assert((int)EXTRA_JIT_FLAGS::HAS_METHOD_PROFILE == (int)CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_UNUSED30, "Jit Flags Mismatch"); +static_assert((int)EXTRA_JIT_FLAGS::HAS_LIKELY_METHOD == (int)CORJIT_FLAGS::CorJitFlag::CORJIT_FLAG_UNUSED29, "Jit Flags Mismatch"); class MethodContext { @@ -106,7 +109,7 @@ class MethodContext int dumpMethodIdentityInfoToBuffer(char* buff, int len, bool ignoreMethodName = false, CORINFO_METHOD_INFO* optInfo = nullptr, unsigned optFlags = 0); int dumpMethodMD5HashToBuffer(char* buff, int len, bool ignoreMethodName = false, CORINFO_METHOD_INFO* optInfo = nullptr, unsigned optFlags = 0); - bool hasPgoData(bool& hasEdgeProfile, bool& hasClassProfile, bool& hasMethodProfile, bool& hasLikelyClass, ICorJitInfo::PgoSource& pgoSource); + bool hasPgoData(bool& hasEdgeProfile, bool& hasClassProfile, bool& hasMethodProfile, bool& hasLikelyClass, bool& hasLikelyMethod, ICorJitInfo::PgoSource& pgoSource); void recGlobalContext(const MethodContext& other); diff --git a/src/coreclr/tools/superpmi/superpmi-shared/spmidumphelper.cpp b/src/coreclr/tools/superpmi/superpmi-shared/spmidumphelper.cpp index 1d1d4d53b1a845..b51a54ba183851 100644 --- a/src/coreclr/tools/superpmi/superpmi-shared/spmidumphelper.cpp +++ b/src/coreclr/tools/superpmi/superpmi-shared/spmidumphelper.cpp @@ -285,6 +285,7 @@ std::string SpmiDumpHelper::DumpJitFlags(unsigned long long flags) AddFlagNumeric(HAS_CLASS_PROFILE, EXTRA_JIT_FLAGS::HAS_CLASS_PROFILE); AddFlagNumeric(HAS_METHOD_PROFILE, EXTRA_JIT_FLAGS::HAS_METHOD_PROFILE); AddFlagNumeric(HAS_LIKELY_CLASS, EXTRA_JIT_FLAGS::HAS_LIKELY_CLASS); + AddFlagNumeric(HAS_LIKELY_METHOD, EXTRA_JIT_FLAGS::HAS_LIKELY_METHOD); AddFlagNumeric(HAS_STATIC_PROFILE, EXTRA_JIT_FLAGS::HAS_STATIC_PROFILE); AddFlagNumeric(HAS_DYNAMIC_PROFILE, EXTRA_JIT_FLAGS::HAS_DYNAMIC_PROFILE); diff --git a/src/coreclr/vm/jithelpers.cpp b/src/coreclr/vm/jithelpers.cpp index 5dfb7de70ba29e..96819036fe63df 100644 --- a/src/coreclr/vm/jithelpers.cpp +++ b/src/coreclr/vm/jithelpers.cpp @@ -5328,7 +5328,7 @@ void JIT_PartialCompilationPatchpoint(int* counter, int ilOffset) #endif // FEATURE_ON_STACK_REPLACEMENT -static unsigned ClassProfileRand() +static unsigned HandleHistogramProfileRand() { // generate a random number (xorshift32) // @@ -5345,7 +5345,43 @@ static unsigned ClassProfileRand() return x; } -HCIMPL2(void, JIT_ClassProfile32, Object *obj, void* tableAddress) +template +static int CheckSample(T index) +{ + const unsigned S = ICorJitInfo::HandleHistogram32::SIZE; + const unsigned N = ICorJitInfo::HandleHistogram32::SAMPLE_INTERVAL; + static_assert_no_msg(N >= S); + static_assert_no_msg((std::is_same::value || std::is_same::value)); + + // If table is not yet full, just add entries in. + // + if (index < S) + { + return static_cast(index); + } + + unsigned x = HandleHistogramProfileRand(); + // N is the sampling window size, + // it should be larger than the table size. + // + // If we let N == count then we are building an entire + // run sample -- probability of update decreases over time. + // Would be a good strategy for an AOT profiler. + // + // But for TieredPGO we would prefer something that is more + // weighted to recent observations. + // + // For S=4, N=128, we'll sample (on average) every 32nd call. + // + if ((x % N) >= S) + { + return -1; + } + + return static_cast(x % S); +} + +HCIMPL2(void, JIT_ClassProfile32, Object *obj, ICorJitInfo::HandleHistogram32* classProfile) { FCALL_CONTRACT; FC_GC_POLL_NOT_NEEDED(); @@ -5353,12 +5389,14 @@ HCIMPL2(void, JIT_ClassProfile32, Object *obj, void* tableAddress) OBJECTREF objRef = ObjectToOBJECTREF(obj); VALIDATEOBJECTREF(objRef); - ICorJitInfo::ClassProfile32* const classProfile = (ICorJitInfo::ClassProfile32*) tableAddress; volatile unsigned* pCount = (volatile unsigned*) &classProfile->Count; - const unsigned count = (*pCount)++; - const unsigned S = ICorJitInfo::ClassProfile32::SIZE; - const unsigned N = ICorJitInfo::ClassProfile32::SAMPLE_INTERVAL; - _ASSERTE(N >= S); + const unsigned callIndex = (*pCount)++; + + int sampleIndex = CheckSample(callIndex); + if (sampleIndex == -1) + { + return; + } if (objRef == NULL) { @@ -5373,7 +5411,7 @@ HCIMPL2(void, JIT_ClassProfile32, Object *obj, void* tableAddress) // if (pMT->GetLoaderAllocator()->IsCollectible()) { - pMT = (MethodTable*)DEFAULT_UNKNOWN_TYPEHANDLE; + pMT = (MethodTable*)DEFAULT_UNKNOWN_HANDLE; } #ifdef _DEBUG @@ -5381,39 +5419,159 @@ HCIMPL2(void, JIT_ClassProfile32, Object *obj, void* tableAddress) PgoManager::VerifyAddress(classProfile + 1); #endif + classProfile->HandleTable[sampleIndex] = (CORINFO_CLASS_HANDLE)pMT; +} +HCIMPLEND + +// Version of helper above used when the count is 64-bit +HCIMPL2(void, JIT_ClassProfile64, Object *obj, ICorJitInfo::HandleHistogram64* classProfile) +{ + FCALL_CONTRACT; + FC_GC_POLL_NOT_NEEDED(); + + OBJECTREF objRef = ObjectToOBJECTREF(obj); + VALIDATEOBJECTREF(objRef); + + volatile uint64_t* pCount = (volatile uint64_t*) &classProfile->Count; + const uint64_t callIndex = (*pCount)++; + + int sampleIndex = CheckSample(callIndex); + if (sampleIndex == -1) + { + return; + } + + if (objRef == NULL) + { + return; + } + + MethodTable* pMT = objRef->GetMethodTable(); + + if (pMT->GetLoaderAllocator()->IsCollectible()) + { + pMT = (MethodTable*)DEFAULT_UNKNOWN_HANDLE; + } + +#ifdef _DEBUG + PgoManager::VerifyAddress(classProfile); + PgoManager::VerifyAddress(classProfile + 1); +#endif + + classProfile->HandleTable[sampleIndex] = (CORINFO_CLASS_HANDLE)pMT; +} +HCIMPLEND + +HCIMPL2(void, JIT_DelegateProfile32, Object *obj, ICorJitInfo::HandleHistogram32* methodProfile) +{ + FCALL_CONTRACT; + FC_GC_POLL_NOT_NEEDED(); + + OBJECTREF objRef = ObjectToOBJECTREF(obj); + VALIDATEOBJECTREF(objRef); + + volatile unsigned* pMethodCount = (volatile unsigned*) &methodProfile->Count; + const unsigned methodCallIndex = (*pMethodCount)++; + int methodSampleIndex = CheckSample(methodCallIndex); + + if (methodSampleIndex == -1) + { + return; + } + + if (objRef == NULL) + { + return; + } + + MethodTable* pMT = objRef->GetMethodTable(); + + _ASSERTE(pMT->IsDelegate()); + + // Resolve method. We handle only the common "direct" delegate as that is + // in any case the only one we can reasonably do GDV for. For instance, + // open delegates are filtered out here, and many cases with inner + // "complicated" logic as well (e.g. static functions, multicast, unmanaged + // functions). + // + MethodDesc* pRecordedMD = (MethodDesc*)DEFAULT_UNKNOWN_HANDLE; + DELEGATEREF del = (DELEGATEREF)objRef; + if ((del->GetInvocationCount() == 0) && (del->GetMethodPtrAux() == NULL)) + { + MethodDesc* pMD = NonVirtualEntry2MethodDesc(del->GetMethodPtr()); + if ((pMD != nullptr) && !pMD->GetLoaderAllocator()->IsCollectible() && !pMD->IsDynamicMethod()) + { + pRecordedMD = pMD; + } + } + +#ifdef _DEBUG + PgoManager::VerifyAddress(methodProfile); + PgoManager::VerifyAddress(methodProfile + 1); +#endif + // If table is not yet full, just add entries in. // - if (count < S) + methodProfile->HandleTable[methodSampleIndex] = (CORINFO_METHOD_HANDLE)pRecordedMD; +} +HCIMPLEND + +// Version of helper above used when the count is 64-bit +HCIMPL3(void, JIT_DelegateProfile64, Object *obj, CORINFO_METHOD_HANDLE baseMethod, ICorJitInfo::HandleHistogram64* methodProfile) +{ + FCALL_CONTRACT; + FC_GC_POLL_NOT_NEEDED(); + + OBJECTREF objRef = ObjectToOBJECTREF(obj); + VALIDATEOBJECTREF(objRef); + + volatile uint64_t* pMethodCount = (volatile uint64_t*) &methodProfile->Count; + const uint64_t methodCallIndex = (*pMethodCount)++; + int methodSampleIndex = CheckSample(methodCallIndex); + + if (methodSampleIndex == -1) { - classProfile->ClassTable[count] = (CORINFO_CLASS_HANDLE)pMT; + return; } - else + + if (objRef == NULL) { - unsigned x = ClassProfileRand(); + return; + } - // N is the sampling window size, - // it should be larger than the table size. - // - // If we let N == count then we are building an entire - // run sample -- probability of update decreases over time. - // Would be a good strategy for an AOT profiler. - // - // But for TieredPGO we would prefer something that is more - // weighted to recent observations. - // - // For S=4, N=128, we'll sample (on average) every 32nd call. - // - if ((x % N) < S) + MethodTable* pMT = objRef->GetMethodTable(); + + _ASSERTE(pMT->IsDelegate()); + + // Resolve method. We handle only the common "direct" delegate as that is + // in any case the only one we can reasonably do GDV for. For instance, + // open delegates are filtered out here, and many cases with inner + // "complicated" logic as well (e.g. static functions, multicast, unmanaged + // functions). + // + MethodDesc* pRecordedMD = (MethodDesc*)DEFAULT_UNKNOWN_HANDLE; + DELEGATEREF del = (DELEGATEREF)objRef; + if ((del->GetInvocationCount() == 0) && (del->GetMethodPtrAux() == NULL)) + { + MethodDesc* pMD = NonVirtualEntry2MethodDesc(del->GetMethodPtr()); + if ((pMD != nullptr) && !pMD->GetLoaderAllocator()->IsCollectible() && !pMD->IsDynamicMethod()) { - unsigned i = x % S; - classProfile->ClassTable[i] = (CORINFO_CLASS_HANDLE)pMT; + pRecordedMD = pMD; } } + +#ifdef _DEBUG + PgoManager::VerifyAddress(methodProfile); + PgoManager::VerifyAddress(methodProfile + 1); +#endif + + // If table is not yet full, just add entries in. + // + methodProfile->HandleTable[methodSampleIndex] = (CORINFO_METHOD_HANDLE)pRecordedMD; } HCIMPLEND -// Version of helper above used when the count is 64-bit -HCIMPL2(void, JIT_ClassProfile64, Object *obj, void* tableAddress) +HCIMPL3(void, JIT_VTableProfile32, Object* obj, CORINFO_METHOD_HANDLE baseMethod, ICorJitInfo::HandleHistogram32* methodProfile) { FCALL_CONTRACT; FC_GC_POLL_NOT_NEEDED(); @@ -5421,44 +5579,109 @@ HCIMPL2(void, JIT_ClassProfile64, Object *obj, void* tableAddress) OBJECTREF objRef = ObjectToOBJECTREF(obj); VALIDATEOBJECTREF(objRef); - ICorJitInfo::ClassProfile64* const classProfile = (ICorJitInfo::ClassProfile64*) tableAddress; - volatile uint64_t* pCount = (volatile uint64_t*) &classProfile->Count; - const uint64_t count = (*pCount)++; - const unsigned S = ICorJitInfo::ClassProfile32::SIZE; - const unsigned N = ICorJitInfo::ClassProfile32::SAMPLE_INTERVAL; - _ASSERTE(N >= S); + volatile unsigned* pMethodCount = (volatile unsigned*) &methodProfile->Count; + const unsigned methodCallIndex = (*pMethodCount)++; + int methodSampleIndex = CheckSample(methodCallIndex); + + if (methodSampleIndex == -1) + { + return; + } if (objRef == NULL) { return; } + MethodDesc* pBaseMD = GetMethod(baseMethod); + + // Method better be virtual + _ASSERTE(pBaseMD->IsVirtual()); + + // We do not expect to see interface methods here as we cannot efficiently + // use method handle information for these anyway. + _ASSERTE(!pBaseMD->IsInterface()); + + // Shouldn't be doing this for instantiated methods as they live elsewhere + _ASSERTE(!pBaseMD->HasMethodInstantiation()); + MethodTable* pMT = objRef->GetMethodTable(); - if (pMT->GetLoaderAllocator()->IsCollectible()) + // Resolve method + WORD slot = pBaseMD->GetSlot(); + _ASSERTE(slot < pBaseMD->GetMethodTable()->GetNumVirtuals()); + + MethodDesc* pMD = pMT->GetMethodDescForSlot(slot); + + MethodDesc* pRecordedMD = (MethodDesc*)DEFAULT_UNKNOWN_HANDLE; + if (!pMD->GetLoaderAllocator()->IsCollectible() && !pMD->IsDynamicMethod()) { - pMT = (MethodTable*)DEFAULT_UNKNOWN_TYPEHANDLE; + pRecordedMD = pMD; } #ifdef _DEBUG - PgoManager::VerifyAddress(classProfile); - PgoManager::VerifyAddress(classProfile + 1); + PgoManager::VerifyAddress(methodProfile); + PgoManager::VerifyAddress(methodProfile + 1); #endif - if (count < S) + methodProfile->HandleTable[methodSampleIndex] = (CORINFO_METHOD_HANDLE)pRecordedMD; +} +HCIMPLEND + +HCIMPL3(void, JIT_VTableProfile64, Object* obj, CORINFO_METHOD_HANDLE baseMethod, ICorJitInfo::HandleHistogram64* methodProfile) +{ + FCALL_CONTRACT; + FC_GC_POLL_NOT_NEEDED(); + + OBJECTREF objRef = ObjectToOBJECTREF(obj); + VALIDATEOBJECTREF(objRef); + + volatile uint64_t* pMethodCount = (volatile uint64_t*) &methodProfile->Count; + const uint64_t methodCallIndex = (*pMethodCount)++; + int methodSampleIndex = CheckSample(methodCallIndex); + + if (methodSampleIndex == -1) { - classProfile->ClassTable[count] = (CORINFO_CLASS_HANDLE)pMT; + return; } - else + + if (objRef == NULL) { - unsigned x = ClassProfileRand(); + return; + } - if ((x % N) < S) - { - unsigned i = x % S; - classProfile->ClassTable[i] = (CORINFO_CLASS_HANDLE)pMT; - } + MethodDesc* pBaseMD = GetMethod(baseMethod); + + // Method better be virtual + _ASSERTE(pBaseMD->IsVirtual()); + + // We do not expect to see interface methods here as we cannot efficiently + // use method handle information for these anyway. + _ASSERTE(!pBaseMD->IsInterface()); + + // Shouldn't be doing this for instantiated methods as they live elsewhere + _ASSERTE(!pBaseMD->HasMethodInstantiation()); + + MethodTable* pMT = objRef->GetMethodTable(); + + // Resolve method + WORD slot = pBaseMD->GetSlot(); + _ASSERTE(slot < pBaseMD->GetMethodTable()->GetNumVirtuals()); + + MethodDesc* pMD = pMT->GetMethodDescForSlot(slot); + + MethodDesc* pRecordedMD = (MethodDesc*)DEFAULT_UNKNOWN_HANDLE; + if (!pMD->GetLoaderAllocator()->IsCollectible() && !pMD->IsDynamicMethod()) + { + pRecordedMD = pMD; } + +#ifdef _DEBUG + PgoManager::VerifyAddress(methodProfile); + PgoManager::VerifyAddress(methodProfile + 1); +#endif + + methodProfile->HandleTable[methodSampleIndex] = (CORINFO_METHOD_HANDLE)pRecordedMD; } HCIMPLEND diff --git a/src/coreclr/vm/method.cpp b/src/coreclr/vm/method.cpp index 8b7f075c1b50b7..ee5d1f21837a69 100644 --- a/src/coreclr/vm/method.cpp +++ b/src/coreclr/vm/method.cpp @@ -2117,6 +2117,13 @@ MethodDesc* NonVirtualEntry2MethodDesc(PCODE entryPoint) return (MethodDesc*)((FixupPrecode*)pInstr)->GetMethodDesc(); } + // Is it an FCALL? + MethodDesc* pFCallMD = ECall::MapTargetBackToMethod(entryPoint); + if (pFCallMD != NULL) + { + return pFCallMD; + } + return NULL; } @@ -2153,11 +2160,6 @@ MethodDesc* Entry2MethodDesc(PCODE entryPoint, MethodTable *pMT) if (pMD != NULL) RETURN(pMD); - // Is it an FCALL? - pMD = ECall::MapTargetBackToMethod(entryPoint); - if (pMD != NULL) - RETURN(pMD); - // We should never get here _ASSERTE(!"Entry2MethodDesc failed"); RETURN (NULL); diff --git a/src/coreclr/vm/pgo.cpp b/src/coreclr/vm/pgo.cpp index 7f7115d1f3c560..a4bad1f5c6dffa 100644 --- a/src/coreclr/vm/pgo.cpp +++ b/src/coreclr/vm/pgo.cpp @@ -281,11 +281,11 @@ void PgoManager::WritePgoData() MethodDesc* md = reinterpret_cast(methodHandleData); if (md == nullptr) { - fprintf(pgoDataFile, "MethodHandle: NULL"); + fprintf(pgoDataFile, "MethodHandle: NULL\n"); } else if (ICorJitInfo::IsUnknownHandle(methodHandleData)) { - fprintf(pgoDataFile, "MethodHandle: UNKNOWN"); + fprintf(pgoDataFile, "MethodHandle: UNKNOWN\n"); } else { @@ -297,13 +297,13 @@ void PgoManager::WritePgoData() // MethodName|@|fully_qualified_type_name if (tTypeName.GetCount() + 1 + tMethodName.GetCount() > 8192) { - fprintf(pgoDataFile, "MethodHandle: UNKNOWN"); + fprintf(pgoDataFile, "MethodHandle: UNKNOWN\n"); } else { StackScratchBuffer methodNameBuffer; StackScratchBuffer typeBuffer; - fprintf(pgoDataFile, "MethodHandle: %s|@|%s", tMethodName.GetUTF8(methodNameBuffer), tTypeName.GetUTF8(typeBuffer)); + fprintf(pgoDataFile, "MethodHandle: %s|@|%s\n", tMethodName.GetUTF8(methodNameBuffer), tTypeName.GetUTF8(typeBuffer)); } } break; diff --git a/src/tests/Common/testenvironment.proj b/src/tests/Common/testenvironment.proj index 4cc62162bc2f2a..f60eb9b8394ac7 100644 --- a/src/tests/Common/testenvironment.proj +++ b/src/tests/Common/testenvironment.proj @@ -60,6 +60,8 @@ COMPlus_JitObjectStackAllocation; COMPlus_JitInlinePolicyProfile; COMPlus_JitClassProfiling; + COMPlus_JitDelegateProfiling; + COMPlus_JitVTableProfiling; COMPlus_JitEdgeProfiling; COMPlus_JitRandomGuardedDevirtualization; COMPlus_JitRandomEdgeCounts; @@ -187,7 +189,6 @@ - @@ -207,7 +208,9 @@ + +