From 1797d9cbeba539c3d8a2ba450ae4ce2fd1eef550 Mon Sep 17 00:00:00 2001 From: Tim Date: Mon, 15 Apr 2024 01:09:39 -0400 Subject: [PATCH] =?UTF-8?q?=EF=BB=BFAdded=20`AggressiveOptimization`=20to?= =?UTF-8?q?=20methods=20involved=20in=20measuring=20allocations.=20Warm=20?= =?UTF-8?q?up=20allocation=20measurement=20before=20taking=20actual=20meas?= =?UTF-8?q?urement.=20Isolated=20allocation=20measurement.=20Changed=20som?= =?UTF-8?q?e=20`RuntimeInformation`=20properties=20to=20static=20readonly?= =?UTF-8?q?=20fields.=20Removed=20enable=20monitoring=20in=20Engine=20(GcS?= =?UTF-8?q?tats=20handles=20it).=20Removed=20`GC.Collect()`=20from=20alloc?= =?UTF-8?q?ation=20measurement.=20Sleep=20thread=20to=20account=20for=20ti?= =?UTF-8?q?ered=20jit=20in=20Core=20runtimes=203.0=20to=206.0.=20Updated?= =?UTF-8?q?=20MemoryDiagnoserTests.=20Block=20finalizer=20thread=20during?= =?UTF-8?q?=20memory=20tests.=20Disabled=20EventSource=20for=20integration?= =?UTF-8?q?=20tests.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/BenchmarkDotNet/Engines/Engine.cs | 110 +++++++++++--- src/BenchmarkDotNet/Engines/GcStats.cs | 22 ++- .../Portability/RuntimeInformation.cs | 57 +++++--- .../BenchmarkDotNet.IntegrationTests.csproj | 2 + .../MemoryDiagnoserTests.cs | 135 ++++++++++++------ 5 files changed, 229 insertions(+), 97 deletions(-) diff --git a/src/BenchmarkDotNet/Engines/Engine.cs b/src/BenchmarkDotNet/Engines/Engine.cs index 9dc8cc616e..9c90aae80f 100644 --- a/src/BenchmarkDotNet/Engines/Engine.cs +++ b/src/BenchmarkDotNet/Engines/Engine.cs @@ -3,6 +3,7 @@ using System.Globalization; using System.Linq; using System.Runtime.CompilerServices; +using System.Threading; using BenchmarkDotNet.Characteristics; using BenchmarkDotNet.Jobs; using BenchmarkDotNet.Portability; @@ -214,31 +215,56 @@ private ClockSpan Measure(Action action, long invokeCount) private (GcStats, ThreadingStats, double) GetExtraStats(IterationData data) { - // we enable monitoring after main target run, for this single iteration which is executed at the end - // so even if we enable AppDomain monitoring in separate process - // it does not matter, because we have already obtained the results! - EnableMonitoring(); + // Warm up the measurement functions before starting the actual measurement. + DeadCodeEliminationHelper.KeepAliveWithoutBoxing(GcStats.ReadInitial()); + DeadCodeEliminationHelper.KeepAliveWithoutBoxing(GcStats.ReadFinal()); IterationSetupAction(); // we run iteration setup first, so even if it allocates, it is not included in the results var initialThreadingStats = ThreadingStats.ReadInitial(); // this method might allocate var exceptionsStats = new ExceptionsStats(); // allocates exceptionsStats.StartListening(); // this method might allocate - var initialGcStats = GcStats.ReadInitial(); - WorkloadAction(data.InvokeCount / data.UnrollFactor); +#if !NET7_0_OR_GREATER + if (RuntimeInformation.IsNetCore && Environment.Version.Major is >= 3 and <= 6 && RuntimeInformation.IsTieredJitEnabled) + { + // #1542 + // We put the current thread to sleep so tiered jit can kick in, compile its stuff, + // and NOT allocate anything on the background thread when we are measuring allocations. + // This is only an issue on netcoreapp3.0 to net6.0. Tiered jit allocations were "fixed" in net7.0 + // (maybe not completely eliminated forever, but at least reduced to a point where measurements are much more stable), + // and netcoreapp2.X uses only GetAllocatedBytesForCurrentThread which doesn't capture the tiered jit allocations. + Thread.Sleep(TimeSpan.FromMilliseconds(500)); + } +#endif - exceptionsStats.Stop(); - var finalGcStats = GcStats.ReadFinal(); + // GC collect before measuring allocations. + ForceGcCollect(); + GcStats gcStats; + using (FinalizerBlocker.MaybeStart()) + { + gcStats = MeasureWithGc(data.InvokeCount / data.UnrollFactor); + } + + exceptionsStats.Stop(); // this method might (de)allocate var finalThreadingStats = ThreadingStats.ReadFinal(); IterationCleanupAction(); // we run iteration cleanup after collecting GC stats var totalOperationsCount = data.InvokeCount * OperationsPerInvoke; - GcStats gcStats = (finalGcStats - initialGcStats).WithTotalOperations(totalOperationsCount); - ThreadingStats threadingStats = (finalThreadingStats - initialThreadingStats).WithTotalOperations(data.InvokeCount * OperationsPerInvoke); + return (gcStats.WithTotalOperations(totalOperationsCount), + (finalThreadingStats - initialThreadingStats).WithTotalOperations(totalOperationsCount), + exceptionsStats.ExceptionsCount / (double)totalOperationsCount); + } - return (gcStats, threadingStats, exceptionsStats.ExceptionsCount / (double)totalOperationsCount); + // Isolate the allocation measurement and skip tier0 jit to make sure we don't get any unexpected allocations. + [MethodImpl(MethodImplOptions.NoInlining | CodeGenHelper.AggressiveOptimizationOption)] + private GcStats MeasureWithGc(long invokeCount) + { + var initialGcStats = GcStats.ReadInitial(); + WorkloadAction(invokeCount); + var finalGcStats = GcStats.ReadFinal(); + return finalGcStats - initialGcStats; } private void RandomizeManagedHeapMemory() @@ -267,7 +293,7 @@ private void GcCollect() ForceGcCollect(); } - private static void ForceGcCollect() + internal static void ForceGcCollect() { GC.Collect(); GC.WaitForPendingFinalizers(); @@ -278,15 +304,6 @@ private static void ForceGcCollect() public void WriteLine() => Host.WriteLine(); - private static void EnableMonitoring() - { - if (RuntimeInformation.IsOldMono) // Monitoring is not available in Mono, see http://stackoverflow.com/questions/40234948/how-to-get-the-number-of-allocated-bytes-in-mono - return; - - if (RuntimeInformation.IsFullFramework) - AppDomain.MonitoringIsEnabled = true; - } - [UsedImplicitly] public static class Signals { @@ -309,5 +326,56 @@ private static readonly Dictionary MessagesToSignals public static bool TryGetSignal(string message, out HostSignal signal) => MessagesToSignals.TryGetValue(message, out signal); } + + // Very long key and value so this shouldn't be used outside of unit tests. + internal const string UnitTestBlockFinalizerEnvKey = "BENCHMARKDOTNET_UNITTEST_BLOCK_FINALIZER_FOR_MEMORYDIAGNOSER"; + internal const string UnitTestBlockFinalizerEnvValue = UnitTestBlockFinalizerEnvKey + "_ACTIVE"; + + // To prevent finalizers interfering with allocation measurements for unit tests, + // we block the finalizer thread until we've completed the measurement. + // https://github.com/dotnet/runtime/issues/101536#issuecomment-2077647417 + private readonly struct FinalizerBlocker : IDisposable + { + private readonly ManualResetEventSlim hangEvent; + + private FinalizerBlocker(ManualResetEventSlim hangEvent) => this.hangEvent = hangEvent; + + private sealed class Impl + { + private readonly ManualResetEventSlim hangEvent = new (false); + private readonly ManualResetEventSlim enteredFinalizerEvent = new (false); + + ~Impl() + { + enteredFinalizerEvent.Set(); + hangEvent.Wait(); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + internal static (ManualResetEventSlim hangEvent, ManualResetEventSlim enteredFinalizerEvent) CreateWeakly() + { + var impl = new Impl(); + return (impl.hangEvent, impl.enteredFinalizerEvent); + } + } + + internal static FinalizerBlocker MaybeStart() + { + if (Environment.GetEnvironmentVariable(UnitTestBlockFinalizerEnvKey) != UnitTestBlockFinalizerEnvValue) + { + return default; + } + var (hangEvent, enteredFinalizerEvent) = Impl.CreateWeakly(); + do + { + GC.Collect(); + // Do NOT call GC.WaitForPendingFinalizers. + } + while (!enteredFinalizerEvent.IsSet); + return new FinalizerBlocker(hangEvent); + } + + public void Dispose() => hangEvent?.Set(); + } } } \ No newline at end of file diff --git a/src/BenchmarkDotNet/Engines/GcStats.cs b/src/BenchmarkDotNet/Engines/GcStats.cs index 9c4c639737..20dd05188a 100644 --- a/src/BenchmarkDotNet/Engines/GcStats.cs +++ b/src/BenchmarkDotNet/Engines/GcStats.cs @@ -1,5 +1,6 @@ using System; using System.Reflection; +using System.Runtime.CompilerServices; using BenchmarkDotNet.Columns; using BenchmarkDotNet.Jobs; using BenchmarkDotNet.Portability; @@ -106,9 +107,10 @@ public int GetCollectionsCount(int generation) return AllocatedBytes <= AllocationQuantum ? 0L : AllocatedBytes; } + // Skip tier0 jit to make sure we don't get any unexpected allocations in this method. + [MethodImpl(CodeGenHelper.AggressiveOptimizationOption)] public static GcStats ReadInitial() { - // this will force GC.Collect, so we want to do this before collecting collections counts long? allocatedBytes = GetAllocatedBytes(); return new GcStats( @@ -119,15 +121,14 @@ public static GcStats ReadInitial() 0); } + // Skip tier0 jit to make sure we don't get any unexpected allocations in this method. + [MethodImpl(CodeGenHelper.AggressiveOptimizationOption)] public static GcStats ReadFinal() { return new GcStats( GC.CollectionCount(0), GC.CollectionCount(1), GC.CollectionCount(2), - - // this will force GC.Collect, so we want to do this after collecting collections counts - // to exclude this single full forced collection from results GetAllocatedBytes(), 0); } @@ -136,17 +137,16 @@ public static GcStats ReadFinal() public static GcStats FromForced(int forcedFullGarbageCollections) => new GcStats(forcedFullGarbageCollections, forcedFullGarbageCollections, forcedFullGarbageCollections, 0, 0); + // Skip tier0 jit to make sure we don't get any unexpected allocations in this method. + [MethodImpl(CodeGenHelper.AggressiveOptimizationOption)] private static long? GetAllocatedBytes() { // we have no tests for WASM and don't want to risk introducing a new bug (https://github.com/dotnet/BenchmarkDotNet/issues/2226) if (RuntimeInformation.IsWasm) return null; - // "This instance Int64 property returns the number of bytes that have been allocated by a specific - // AppDomain. The number is accurate as of the last garbage collection." - CLR via C# - // so we enforce GC.Collect here just to make sure we get accurate results - GC.Collect(); - + // Do NOT call GC.Collect() here, as it causes finalizers to run and possibly allocate. https://github.com/dotnet/runtime/issues/101536#issuecomment-2077533242 + // Instead, we call it before we start the measurement in the Engine. #if NET6_0_OR_GREATER return GC.GetTotalAllocatedBytes(precise: true); #else @@ -218,9 +218,7 @@ private static long CalculateAllocationQuantumSize() break; } - GC.Collect(); - GC.WaitForPendingFinalizers(); - GC.Collect(); + Engine.ForceGcCollect(); result = GC.GetTotalMemory(false); var tmp = new object(); diff --git a/src/BenchmarkDotNet/Portability/RuntimeInformation.cs b/src/BenchmarkDotNet/Portability/RuntimeInformation.cs index ed4d457caa..6e56d59d05 100644 --- a/src/BenchmarkDotNet/Portability/RuntimeInformation.cs +++ b/src/BenchmarkDotNet/Portability/RuntimeInformation.cs @@ -29,47 +29,47 @@ internal static class RuntimeInformation internal const string ReleaseConfigurationName = "RELEASE"; internal const string Unknown = "?"; + // Many of these checks allocate and/or are expensive to compute. We store the results in static readonly fields to keep Engine non-allocating. + // Static readonly fields are used instead of properties to avoid an extra getter method call that might not be tier1 jitted. + // This class is internal, so we don't need to expose these as properties. + /// /// returns true for both the old (implementation of .NET Framework) and new Mono (.NET 6+ flavour) /// - public static bool IsMono { get; } = - Type.GetType("Mono.RuntimeStructs") != null; // it allocates a lot of memory, we need to check it once in order to keep Engine non-allocating! + public static readonly bool IsMono = Type.GetType("Mono.RuntimeStructs") != null; - public static bool IsOldMono { get; } = Type.GetType("Mono.Runtime") != null; + public static readonly bool IsOldMono = Type.GetType("Mono.Runtime") != null; - public static bool IsNewMono { get; } = IsMono && !IsOldMono; + public static readonly bool IsNewMono = IsMono && !IsOldMono; - public static bool IsFullFramework => + public static readonly bool IsFullFramework = #if NET6_0_OR_GREATER + // This could be const, but we want to avoid unreachable code warnings. false; #else FrameworkDescription.StartsWith(".NET Framework", StringComparison.OrdinalIgnoreCase); #endif - [PublicAPI] - public static bool IsNetNative => FrameworkDescription.StartsWith(".NET Native", StringComparison.OrdinalIgnoreCase); + public static readonly bool IsNetNative = FrameworkDescription.StartsWith(".NET Native", StringComparison.OrdinalIgnoreCase); - public static bool IsNetCore - => ((Environment.Version.Major >= 5) || FrameworkDescription.StartsWith(".NET Core", StringComparison.OrdinalIgnoreCase)) - && !string.IsNullOrEmpty(typeof(object).Assembly.Location); + public static readonly bool IsNetCore = + ((Environment.Version.Major >= 5) || FrameworkDescription.StartsWith(".NET Core", StringComparison.OrdinalIgnoreCase)) + && !string.IsNullOrEmpty(typeof(object).Assembly.Location); - public static bool IsNativeAOT - => Environment.Version.Major >= 5 - && string.IsNullOrEmpty(typeof(object).Assembly.Location) // it's merged to a single .exe and .Location returns null - && !IsWasm; // Wasm also returns "" for assembly locations + public static readonly bool IsNativeAOT = + Environment.Version.Major >= 5 + && string.IsNullOrEmpty(typeof(object).Assembly.Location) // it's merged to a single .exe and .Location returns null + && !IsWasm; // Wasm also returns "" for assembly locations #if NET6_0_OR_GREATER [System.Runtime.Versioning.SupportedOSPlatformGuard("browser")] -#endif - public static bool IsWasm => -#if NET6_0_OR_GREATER - OperatingSystem.IsBrowser(); + public static readonly bool IsWasm = OperatingSystem.IsBrowser(); #else - IsOSPlatform(OSPlatform.Create("BROWSER")); + public static readonly bool IsWasm = IsOSPlatform(OSPlatform.Create("BROWSER")); #endif #if NETSTANDARD2_0 - public static bool IsAot { get; } = IsAotMethod(); // This allocates, so we only want to call it once statically. + public static readonly bool IsAot = IsAotMethod(); private static bool IsAotMethod() { @@ -87,11 +87,22 @@ private static bool IsAotMethod() return false; } #else - public static bool IsAot => !System.Runtime.CompilerServices.RuntimeFeature.IsDynamicCodeCompiled; + public static readonly bool IsAot = !System.Runtime.CompilerServices.RuntimeFeature.IsDynamicCodeCompiled; #endif - public static bool IsRunningInContainer => string.Equals(Environment.GetEnvironmentVariable("DOTNET_RUNNING_IN_CONTAINER"), "true"); - + public static readonly bool IsTieredJitEnabled = + IsNetCore + && (Environment.Version.Major < 3 + // Disabled by default in netcoreapp2.X, check if it's enabled. + ? Environment.GetEnvironmentVariable("COMPlus_TieredCompilation") == "1" + || Environment.GetEnvironmentVariable("DOTNET_TieredCompilation") == "1" + || (AppContext.TryGetSwitch("System.Runtime.TieredCompilation", out bool isEnabled) && isEnabled) + // Enabled by default in netcoreapp3.0+, check if it's disabled. + : Environment.GetEnvironmentVariable("COMPlus_TieredCompilation") != "0" + && Environment.GetEnvironmentVariable("DOTNET_TieredCompilation") != "0" + && (!AppContext.TryGetSwitch("System.Runtime.TieredCompilation", out isEnabled) || isEnabled)); + + public static readonly bool IsRunningInContainer = string.Equals(Environment.GetEnvironmentVariable("DOTNET_RUNNING_IN_CONTAINER"), "true"); internal static string GetArchitecture() => GetCurrentPlatform().ToString(); diff --git a/tests/BenchmarkDotNet.IntegrationTests/BenchmarkDotNet.IntegrationTests.csproj b/tests/BenchmarkDotNet.IntegrationTests/BenchmarkDotNet.IntegrationTests.csproj index a928af1424..93cc6a8ab7 100644 --- a/tests/BenchmarkDotNet.IntegrationTests/BenchmarkDotNet.IntegrationTests.csproj +++ b/tests/BenchmarkDotNet.IntegrationTests/BenchmarkDotNet.IntegrationTests.csproj @@ -18,6 +18,8 @@ Always + + diff --git a/tests/BenchmarkDotNet.IntegrationTests/MemoryDiagnoserTests.cs b/tests/BenchmarkDotNet.IntegrationTests/MemoryDiagnoserTests.cs index 603926c2a6..279562058f 100755 --- a/tests/BenchmarkDotNet.IntegrationTests/MemoryDiagnoserTests.cs +++ b/tests/BenchmarkDotNet.IntegrationTests/MemoryDiagnoserTests.cs @@ -36,7 +36,11 @@ public class MemoryDiagnoserTests public static IEnumerable GetToolchains() { yield return new object[] { Job.Default.GetToolchain() }; - yield return new object[] { InProcessEmitToolchain.Instance }; + // InProcessEmit reports flaky allocations in current .Net 8. + if (!RuntimeInformation.IsNetCore) + { + yield return new object[] { InProcessEmitToolchain.Instance }; + } } public class AccurateAllocations @@ -67,7 +71,7 @@ public void MemoryDiagnoserIsAccurate(IToolchain toolchain) }); } - [FactEnvSpecific("We don't want to test NativeAOT twice (for .NET Framework 4.6.2 and .NET 7.0)", EnvRequirement.DotNetCoreOnly)] + [FactEnvSpecific("We don't want to test NativeAOT twice (for .NET Framework 4.6.2 and .NET 8.0)", EnvRequirement.DotNetCoreOnly)] public void MemoryDiagnoserSupportsNativeAOT() { if (OsDetector.IsMacOS()) @@ -105,7 +109,7 @@ private void AllocateUntilGcWakesUp() } } - [Theory(Skip = "#1542 Tiered JIT Thread allocates memory in the background"), MemberData(nameof(GetToolchains))] + [Theory, MemberData(nameof(GetToolchains))] [Trait(Constants.Category, Constants.BackwardCompatibilityCategory)] public void MemoryDiagnoserDoesNotIncludeAllocationsFromSetupAndCleanup(IToolchain toolchain) { @@ -118,23 +122,41 @@ public void MemoryDiagnoserDoesNotIncludeAllocationsFromSetupAndCleanup(IToolcha public class NoAllocationsAtAll { [Benchmark] public void EmptyMethod() { } + + [Benchmark] + public ulong TimeConsuming() + { + var r = 1ul; + for (var i = 0; i < 50_000_000; i++) + { + r /= 1; + } + return r; + } } [Theory, MemberData(nameof(GetToolchains))] [Trait(Constants.Category, Constants.BackwardCompatibilityCategory)] public void EngineShouldNotInterfereAllocationResults(IToolchain toolchain) { - if (RuntimeInformation.IsFullFramework && toolchain.IsInProcess) - { - return; // this test is flaky on Full Framework - } - AssertAllocations(toolchain, typeof(NoAllocationsAtAll), new Dictionary { { nameof(NoAllocationsAtAll.EmptyMethod), 0 } }); } + // #1542 + [Theory, MemberData(nameof(GetToolchains))] + [Trait(Constants.Category, Constants.BackwardCompatibilityCategory)] + public void TieredJitShouldNotInterfereAllocationResults(IToolchain toolchain) + { + AssertAllocations(toolchain, typeof(NoAllocationsAtAll), new Dictionary + { + { nameof(NoAllocationsAtAll.TimeConsuming), 0 } + }, + disableTieredJit: false, iterationCount: 10); // 1 iteration is not enough to repro the problem + } + public class NoBoxing { [Benchmark] public ValueTuple ReturnsValueType() => new ValueTuple(0); @@ -165,11 +187,6 @@ public class NonAllocatingAsynchronousBenchmarks [Trait(Constants.Category, Constants.BackwardCompatibilityCategory)] public void AwaitingTasksShouldNotInterfereAllocationResults(IToolchain toolchain) { - if (toolchain.IsInProcess) - { - return; // it's flaky: https://github.com/dotnet/BenchmarkDotNet/issues/1925 - } - AssertAllocations(toolchain, typeof(NonAllocatingAsynchronousBenchmarks), new Dictionary { { nameof(NonAllocatingAsynchronousBenchmarks.CompletedTask), 0 }, @@ -217,8 +234,9 @@ public byte[] SixtyFourBytesArray() } } - [Theory(Skip = "#1542 Tiered JIT Thread allocates memory in the background"), MemberData(nameof(GetToolchains))] - //[TheoryNetCoreOnly("Only .NET Core 2.0+ API is bug free for this case"), MemberData(nameof(GetToolchains))] + [TheoryEnvSpecific("Full Framework cannot measure precisely enough for low invocation counts.", EnvRequirement.DotNetCoreOnly), MemberData(nameof(GetToolchains), + Skip = "Some random background allocations are occurring that we haven't been able to figure out, causing this test in particular to be flaky." + + " Other tests likely also suffer from it, but their high invocation counts successfully drown it out. #2562")] [Trait(Constants.Category, Constants.BackwardCompatibilityCategory)] public void AllocationQuantumIsNotAnIssueForNetCore21Plus(IToolchain toolchain) { @@ -233,31 +251,57 @@ public void AllocationQuantumIsNotAnIssueForNetCore21Plus(IToolchain toolchain) public class MultiThreadedAllocation { - public const int Size = 1_000_000; + public const int Size = 1024; public const int ThreadsCount = 10; + // We cache the threads in GlobalSetup and reuse them for each benchmark invocation + // to avoid measuring the cost of thread start and join, which varies across different runtimes. private Thread[] threads; + private volatile bool keepRunning = true; + private readonly Barrier barrier = new (ThreadsCount + 1); + private readonly CountdownEvent countdownEvent = new (ThreadsCount); - [IterationSetup] - public void SetupIteration() + [GlobalSetup] + public void Setup() { threads = Enumerable.Range(0, ThreadsCount) - .Select(_ => new Thread(() => GC.KeepAlive(new byte[Size]))) + .Select(_ => new Thread(() => + { + while (keepRunning) + { + barrier.SignalAndWait(); + GC.KeepAlive(new byte[Size]); + countdownEvent.Signal(); + } + })) .ToArray(); + foreach (var thread in threads) + { + thread.Start(); + } } - [Benchmark] - public void Allocate() + [GlobalCleanup] + public void Cleanup() { + keepRunning = false; + barrier.SignalAndWait(); foreach (var thread in threads) { - thread.Start(); thread.Join(); } } + + [Benchmark] + public void Allocate() + { + countdownEvent.Reset(ThreadsCount); + barrier.SignalAndWait(); + countdownEvent.Wait(); + } } - [Theory(Skip = "Test is flaky even in latest .Net")] + [TheoryEnvSpecific("Full Framework cannot measure precisely enough", EnvRequirement.DotNetCoreOnly)] [MemberData(nameof(GetToolchains))] [Trait(Constants.Category, Constants.BackwardCompatibilityCategory)] public void MemoryDiagnoserIsAccurateForMultiThreadedBenchmarks(IToolchain toolchain) @@ -265,18 +309,16 @@ public void MemoryDiagnoserIsAccurateForMultiThreadedBenchmarks(IToolchain toolc long objectAllocationOverhead = IntPtr.Size * 2; // pointer to method table + object header word long arraySizeOverhead = IntPtr.Size; // array length long memoryAllocatedPerArray = (MultiThreadedAllocation.Size + objectAllocationOverhead + arraySizeOverhead); - long threadStartAndJoinOverhead = 112; // this is more or less a magic number taken from memory profiler - long allocatedMemoryPerThread = memoryAllocatedPerArray + threadStartAndJoinOverhead; AssertAllocations(toolchain, typeof(MultiThreadedAllocation), new Dictionary { - { nameof(MultiThreadedAllocation.Allocate), allocatedMemoryPerThread * MultiThreadedAllocation.ThreadsCount } + { nameof(MultiThreadedAllocation.Allocate), memoryAllocatedPerArray * MultiThreadedAllocation.ThreadsCount } }); } - private void AssertAllocations(IToolchain toolchain, Type benchmarkType, Dictionary benchmarksAllocationsValidators) + private void AssertAllocations(IToolchain toolchain, Type benchmarkType, Dictionary benchmarksAllocationsValidators, bool disableTieredJit = true, int iterationCount = 1) { - var config = CreateConfig(toolchain); + var config = CreateConfig(toolchain, disableTieredJit, iterationCount); var benchmarks = BenchmarkConverter.TypeToBenchmarks(benchmarkType, config); var summary = BenchmarkRunner.Run(benchmarks); @@ -312,24 +354,35 @@ private void AssertAllocations(IToolchain toolchain, Type benchmarkType, Diction } } - private IConfig CreateConfig(IToolchain toolchain) - => ManualConfig.CreateEmpty() - .AddJob(Job.ShortRun - .WithEvaluateOverhead(false) // no need to run idle for this test - .WithWarmupCount(0) // don't run warmup to save some time for our CI runs - .WithIterationCount(1) // single iteration is enough for us - .WithGcForce(false) - .WithGcServer(false) - .WithGcConcurrent(false) - .WithEnvironmentVariables([ - // Tiered JIT can allocate some memory on a background thread, let's disable it to make our tests less flaky (#1542) + private IConfig CreateConfig(IToolchain toolchain, + // Tiered JIT can allocate some memory on a background thread, let's disable it by default to make our tests less flaky (#1542). + // This was mostly fixed in net7.0, but tiered jit thread is not guaranteed to not allocate, so we disable it just in case. + bool disableTieredJit = true, + // Single iteration is enough for most of the tests. + int iterationCount = 1) + { + var job = Job.ShortRun + .WithEvaluateOverhead(false) // no need to run idle for this test + .WithWarmupCount(0) // don't run warmup to save some time for our CI runs + .WithIterationCount(iterationCount) + .WithGcForce(false) + .WithGcServer(false) + .WithGcConcurrent(false) + // To prevent finalizers allocating out of our control, we hang the finalizer thread. + // https://github.com/dotnet/runtime/issues/101536#issuecomment-2077647417 + .WithEnvironmentVariable(Engines.Engine.UnitTestBlockFinalizerEnvKey, Engines.Engine.UnitTestBlockFinalizerEnvValue) + .WithToolchain(toolchain); + return ManualConfig.CreateEmpty() + .AddJob(disableTieredJit + ? job.WithEnvironmentVariables( new EnvironmentVariable("DOTNET_TieredCompilation", "0"), new EnvironmentVariable("COMPlus_TieredCompilation", "0") - ]) - .WithToolchain(toolchain)) + ) + : job) .AddColumnProvider(DefaultColumnProviders.Instance) .AddDiagnoser(MemoryDiagnoser.Default) .AddLogger(toolchain.IsInProcess ? ConsoleLogger.Default : new OutputLogger(output)); // we can't use OutputLogger for the InProcess toolchains because it allocates memory on the same thread + } // note: don't copy, never use in production systems (it should work but I am not 100% sure) private int CalculateRequiredSpace()