Skip to content

Commit

Permalink
ChaoticCluster playground + instrumentation
Browse files Browse the repository at this point in the history
  • Loading branch information
ReubenBond committed Oct 8, 2024
1 parent 26fab60 commit 054910d
Show file tree
Hide file tree
Showing 16 changed files with 368 additions and 103 deletions.
7 changes: 7 additions & 0 deletions Directory.Packages.props
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,13 @@
<PackageVersion Include="Aspire.Hosting.Orleans" Version="8.2.0" />
<PackageVersion Include="Aspire.Hosting.Redis" Version="8.2.0" />
<PackageVersion Include="Aspire.StackExchange.Redis" Version="8.2.0" />
<PackageVersion Include="Microsoft.Extensions.Http.Resilience" Version="8.8.0" />
<PackageVersion Include="Microsoft.Extensions.ServiceDiscovery" Version="8.2.0" />
<PackageVersion Include="OpenTelemetry.Exporter.OpenTelemetryProtocol" Version="1.9.0" />
<PackageVersion Include="OpenTelemetry.Extensions.Hosting" Version="1.9.0" />
<PackageVersion Include="OpenTelemetry.Instrumentation.AspNetCore" Version="1.9.0" />
<PackageVersion Include="OpenTelemetry.Instrumentation.Http" Version="1.9.0" />
<PackageVersion Include="OpenTelemetry.Instrumentation.Runtime" Version="1.9.0" />
<!-- 3rd party packages -->
<PackageVersion Include="Google.Cloud.PubSub.V1" Version="1.0.0-beta13" />
<PackageVersion Include="AWSSDK.DynamoDBv2" Version="3.7.300.6" />
Expand Down
7 changes: 7 additions & 0 deletions Orleans.sln
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ChaoticCluster.AppHost", "p
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ChaoticCluster.Silo", "playground\ChaoticCluster\ChaoticCluster.Silo\ChaoticCluster.Silo.csproj", "{76A549FA-69F1-4967-82B6-161A8B52C86B}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ChaoticCluster.ServiceDefaults", "playground\ChaoticCluster\ChaoticCluster.ServiceDefaults\ChaoticCluster.ServiceDefaults.csproj", "{4004A79F-B6BB-4472-891B-AD1348AE3E93}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TestSerializerExternalModels", "test\Misc\TestSerializerExternalModels\TestSerializerExternalModels.csproj", "{5D587DDE-036D-4694-A314-8DDF270AC031}"
EndProject
Global
Expand Down Expand Up @@ -648,6 +650,10 @@ Global
{76A549FA-69F1-4967-82B6-161A8B52C86B}.Debug|Any CPU.Build.0 = Debug|Any CPU
{76A549FA-69F1-4967-82B6-161A8B52C86B}.Release|Any CPU.ActiveCfg = Release|Any CPU
{76A549FA-69F1-4967-82B6-161A8B52C86B}.Release|Any CPU.Build.0 = Release|Any CPU
{4004A79F-B6BB-4472-891B-AD1348AE3E93}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{4004A79F-B6BB-4472-891B-AD1348AE3E93}.Debug|Any CPU.Build.0 = Debug|Any CPU
{4004A79F-B6BB-4472-891B-AD1348AE3E93}.Release|Any CPU.ActiveCfg = Release|Any CPU
{4004A79F-B6BB-4472-891B-AD1348AE3E93}.Release|Any CPU.Build.0 = Release|Any CPU
{5D587DDE-036D-4694-A314-8DDF270AC031}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{5D587DDE-036D-4694-A314-8DDF270AC031}.Debug|Any CPU.Build.0 = Debug|Any CPU
{5D587DDE-036D-4694-A314-8DDF270AC031}.Release|Any CPU.ActiveCfg = Release|Any CPU
Expand Down Expand Up @@ -771,6 +777,7 @@ Global
{2579A7F6-EBE8-485A-BB20-A5D19DB5612B} = {A41DE3D1-F8AA-4234-BE6F-3C9646A1507A}
{4E79EC4B-2DC4-41E3-9AE6-17C1FFF17B02} = {2579A7F6-EBE8-485A-BB20-A5D19DB5612B}
{76A549FA-69F1-4967-82B6-161A8B52C86B} = {2579A7F6-EBE8-485A-BB20-A5D19DB5612B}
{4004A79F-B6BB-4472-891B-AD1348AE3E93} = {2579A7F6-EBE8-485A-BB20-A5D19DB5612B}
{5D587DDE-036D-4694-A314-8DDF270AC031} = {70BCC54E-1618-4742-A079-07588065E361}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<IsAspireSharedProject>true</IsAspireSharedProject>
</PropertyGroup>

<ItemGroup>
<FrameworkReference Include="Microsoft.AspNetCore.App" />

<PackageReference Include="Microsoft.Extensions.Http.Resilience" />
<PackageReference Include="Microsoft.Extensions.ServiceDiscovery" />
<PackageReference Include="OpenTelemetry.Exporter.OpenTelemetryProtocol" />
<PackageReference Include="OpenTelemetry.Extensions.Hosting" />
<PackageReference Include="OpenTelemetry.Instrumentation.AspNetCore" />
<PackageReference Include="OpenTelemetry.Instrumentation.Http" />
<PackageReference Include="OpenTelemetry.Instrumentation.Runtime" />
</ItemGroup>

</Project>
111 changes: 111 additions & 0 deletions playground/ChaoticCluster/ChaoticCluster.ServiceDefaults/Extensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
using Microsoft.AspNetCore.Builder;
using Microsoft.AspNetCore.Diagnostics.HealthChecks;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Diagnostics.HealthChecks;
using Microsoft.Extensions.Logging;
using OpenTelemetry;
using OpenTelemetry.Metrics;
using OpenTelemetry.Trace;

namespace Microsoft.Extensions.Hosting;
// Adds common .NET Aspire services: service discovery, resilience, health checks, and OpenTelemetry.
// This project should be referenced by each service project in your solution.
// To learn more about using this project, see https://aka.ms/dotnet/aspire/service-defaults
public static class Extensions
{
public static IHostApplicationBuilder AddServiceDefaults(this IHostApplicationBuilder builder)
{
builder.ConfigureOpenTelemetry();

builder.AddDefaultHealthChecks();

builder.Services.AddServiceDiscovery();

builder.Services.ConfigureHttpClientDefaults(http =>
{
// Turn on resilience by default
http.AddStandardResilienceHandler();
// Turn on service discovery by default
http.AddServiceDiscovery();
});

// Uncomment the following to restrict the allowed schemes for service discovery.
// builder.Services.Configure<ServiceDiscoveryOptions>(options =>
// {
// options.AllowedSchemes = ["https"];
// });

return builder;
}

public static IHostApplicationBuilder ConfigureOpenTelemetry(this IHostApplicationBuilder builder)
{
builder.Logging.AddOpenTelemetry(logging =>
{
logging.IncludeFormattedMessage = true;
logging.IncludeScopes = true;
});

builder.Services.AddOpenTelemetry()
.WithMetrics(metrics =>
{
metrics.AddAspNetCoreInstrumentation()
.AddHttpClientInstrumentation()
.AddRuntimeInstrumentation()
.AddMeter("System.Runtime")
.AddMeter("Microsoft.Orleans");
});

builder.AddOpenTelemetryExporters();

return builder;
}

private static IHostApplicationBuilder AddOpenTelemetryExporters(this IHostApplicationBuilder builder)
{
var useOtlpExporter = !string.IsNullOrWhiteSpace(builder.Configuration["OTEL_EXPORTER_OTLP_ENDPOINT"]);

if (useOtlpExporter)
{
builder.Services.AddOpenTelemetry().UseOtlpExporter();
}

// Uncomment the following lines to enable the Azure Monitor exporter (requires the Azure.Monitor.OpenTelemetry.AspNetCore package)
//if (!string.IsNullOrEmpty(builder.Configuration["APPLICATIONINSIGHTS_CONNECTION_STRING"]))
//{
// builder.Services.AddOpenTelemetry()
// .UseAzureMonitor();
//}

return builder;
}

public static IHostApplicationBuilder AddDefaultHealthChecks(this IHostApplicationBuilder builder)
{
builder.Services.AddHealthChecks()
// Add a default liveness check to ensure app is responsive
.AddCheck("self", () => HealthCheckResult.Healthy(), ["live"]);

return builder;
}

public static WebApplication MapDefaultEndpoints(this WebApplication app)
{
// Adding health checks endpoints to applications in non-development environments has security implications.
// See https://aka.ms/dotnet/aspire/healthchecks for details before enabling these endpoints in non-development environments.
if (app.Environment.IsDevelopment())
{
// All health checks must pass for app to be considered ready to accept traffic after starting
app.MapHealthChecks("/health");

// Only health checks tagged with the "live" tag must pass for app to be considered alive
app.MapHealthChecks("/alive", new HealthCheckOptions
{
Predicate = r => r.Tags.Contains("live")
});
}

return app;
}
}
Original file line number Diff line number Diff line change
@@ -1,16 +1,19 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<OrleansBuildTimeCodeGen>true</OrleansBuildTimeCodeGen>
<ServerGarbageCollection>true</ServerGarbageCollection>
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\..\..\src\Azure\Orleans.Clustering.AzureStorage\Orleans.Clustering.AzureStorage.csproj" />
<ProjectReference Include="..\..\..\src\Orleans.Server\Orleans.Server.csproj" />
<ProjectReference Include="..\..\..\src\Orleans.TestingHost\Orleans.TestingHost.csproj" />
<ProjectReference Include="..\ChaoticCluster.ServiceDefaults\ChaoticCluster.ServiceDefaults.csproj" />
</ItemGroup>

</Project>
153 changes: 151 additions & 2 deletions playground/ChaoticCluster/ChaoticCluster.Silo/Program.cs
Original file line number Diff line number Diff line change
@@ -1,2 +1,151 @@
// See https://aka.ms/new-console-template for more information
Console.WriteLine("Hello, World!");
using System.Diagnostics;
using ChaoticCluster.Silo;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Logging;
using Orleans.TestingHost;

var builder = Host.CreateApplicationBuilder(args);
builder.AddServiceDefaults(); // Configure OTel
using var app = builder.Build();
await app.StartAsync();

var testClusterBuilder = new InProcessTestClusterBuilder(1);
testClusterBuilder.ConfigureSilo((options, siloBuilder) => new SiloBuilderConfigurator().Configure(siloBuilder));
testClusterBuilder.ConfigureSiloHost((options, hostBuilder) =>
{
foreach (var provider in app.Services.GetServices<ILoggerProvider>())
{
hostBuilder.Logging.AddProvider(provider);
}
});

testClusterBuilder.ConfigureClientHost(hostBuilder =>
{
foreach (var provider in app.Services.GetServices<ILoggerProvider>())
{
hostBuilder.Logging.AddProvider(provider);
}
});

var testCluster = testClusterBuilder.Build();
await testCluster.DeployAsync();
var log = testCluster.Client.ServiceProvider.GetRequiredService<ILogger<Program>>();
log.LogInformation($"ServiceId: {testCluster.Options.ServiceId}");
log.LogInformation($"ClusterId: {testCluster.Options.ClusterId}");

var cts = new CancellationTokenSource(TimeSpan.FromMinutes(15));
var reconfigurationTimer = Stopwatch.StartNew();
var upperLimit = 10;
var lowerLimit = 1; // Membership is kept on the primary, so we can't go below 1
var target = upperLimit;
var idBase = 0L;
var client = testCluster.Silos[0].ServiceProvider.GetRequiredService<IGrainFactory>();
const int CallsPerIteration = 100;
const int MaxGrains = 524_288; // 2**19;

var loadTask = Task.Run(async () =>
{
while (!cts.IsCancellationRequested)
{
var time = Stopwatch.StartNew();
var tasks = Enumerable.Range(0, CallsPerIteration).Select(i => client.GetGrain<IMyTestGrain>((idBase + i) % MaxGrains).Ping().AsTask()).ToList();
var workTask = Task.WhenAll(tasks);
using var delayCancellation = new CancellationTokenSource();
var delay = TimeSpan.FromMilliseconds(90_000);
var delayTask = Task.Delay(delay, delayCancellation.Token);
await Task.WhenAny(workTask, delayTask);
try
{
await workTask;
}
catch (SiloUnavailableException sue)
{
log.LogInformation(sue, "Swallowed transient exception.");
}
catch (OrleansMessageRejectionException omre)
{
log.LogInformation(omre, "Swallowed rejection.");
}
catch (Exception exception)
{
log.LogError(exception, "Unhandled exception.");
throw;
}
delayCancellation.Cancel();
idBase += CallsPerIteration;
}
});

var chaosTask = Task.Run(async () =>
{
var clusterOperation = Task.CompletedTask;
while (!cts.IsCancellationRequested)
{
try
{
var remaining = TimeSpan.FromSeconds(10) - reconfigurationTimer.Elapsed;
if (remaining <= TimeSpan.Zero)
{
reconfigurationTimer.Restart();
await clusterOperation;
clusterOperation = Task.Run(async () =>
{
var currentCount = testCluster.Silos.Count;
if (currentCount > target)
{
// Stop or kill a random silo, but not the primary (since that hosts cluster membership)
var victim = testCluster.Silos[Random.Shared.Next(1, testCluster.Silos.Count - 1)];
if (currentCount % 2 == 0)
{
log.LogInformation($"Stopping '{victim.SiloAddress}'.");
await testCluster.StopSiloAsync(victim);
log.LogInformation($"Stopped '{victim.SiloAddress}'.");
}
else
{
log.LogInformation($"Killing '{victim.SiloAddress}'.");
await testCluster.KillSiloAsync(victim);
log.LogInformation($"Killed '{victim.SiloAddress}'.");
}
}
else if (currentCount < target)
{
log.LogInformation("Starting new silo.");
var result = await testCluster.StartAdditionalSiloAsync();
log.LogInformation($"Started '{result.SiloAddress}'.");
}
if (currentCount <= lowerLimit)
{
target = upperLimit;
}
else if (currentCount >= upperLimit)
{
target = lowerLimit;
}
});
}
else
{
await Task.Delay(remaining);
}
}
catch (Exception exception)
{
log.LogInformation(exception, "Ignoring chaos exception.");
}
}
});

await await Task.WhenAny(loadTask, chaosTask);
cts.Cancel();
await Task.WhenAll(loadTask, chaosTask);
await testCluster.StopAllSilosAsync();
await testCluster.DisposeAsync();

await app.StopAsync();
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
using Microsoft.Extensions.DependencyInjection;
using Orleans.Configuration;
using Orleans.TestingHost;

namespace ChaoticCluster.Silo;

class SiloBuilderConfigurator : ISiloConfigurator
{
public void Configure(ISiloBuilder siloBuilder)
{
#pragma warning disable ORLEANSEXP002 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
siloBuilder.AddDistributedGrainDirectory();
#pragma warning restore ORLEANSEXP002 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
}
}

internal interface IMyTestGrain : IGrainWithIntegerKey
{
ValueTask Ping();
}

[CollectionAgeLimit(Minutes = 1.01)]
internal class MyTestGrain : Grain, IMyTestGrain
{
public ValueTask Ping() => default;
}
Loading

0 comments on commit 054910d

Please sign in to comment.