Skip to content

Commit

Permalink
DT.AzureStorage: Adding provision to perform graceful shutdown before…
Browse files Browse the repository at this point in the history
… process kill due to max timeout hits (#536)
  • Loading branch information
RamjotSingh authored May 4, 2021
1 parent 75b5bf3 commit bd61869
Show file tree
Hide file tree
Showing 3 changed files with 185 additions and 3 deletions.
138 changes: 138 additions & 0 deletions Test/DurableTask.AzureStorage.Tests/TimeoutHandlerTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
using System;
using System.Collections.Generic;
using System.Reflection;
using System.Text;
using System.Threading.Tasks;
using Microsoft.VisualStudio.TestTools.UnitTesting;

namespace DurableTask.AzureStorage.Tests
{
/// <summary>
/// Tests for <see cref="TimeoutHandler"/>.
/// </summary>
[TestClass]
public class TimeoutHandlerTests
{
/// <summary>
/// Ensures that process graceful action is executed before process is killed.
/// </summary>
/// <returns>Task tracking operation.</returns>
[TestMethod]
public async Task EnsureTimeoutHandlerRunsProcessShutdownEventsBeforeProcessKill()
{
int executionCount = 0;
int killCount = 0;
int shutdownCount = 0;

Action<string> killAction = (errorString) => killCount++;
typeof(TimeoutHandler)
.GetField("ProcessKillAction", BindingFlags.NonPublic | BindingFlags.Static)
.SetValue(null, killAction);

// TimeoutHandler at the moment invokes shutdown on 5th call failure.
await TimeoutHandler.ExecuteWithTimeout(
"test",
"account",
new AzureStorageOrchestrationServiceSettings
{
OnImminentFailFast = (errorString) =>
{
shutdownCount++;
return Task.FromResult(true);
}
},
async (operationContext, cancellationToken) =>
{
executionCount++;
await Task.Delay(TimeSpan.FromMinutes(3));
return 1;
});

Assert.AreEqual(5, executionCount);
Assert.AreEqual(1, shutdownCount);
Assert.AreEqual(1, killCount);
}

/// <summary>
/// Ensures that process graceful action is executed and failfast is skipped.
/// </summary>
/// <returns>Task tracking operation.</returns>
[TestMethod]
[ExpectedException(typeof(TimeoutException))]
public async Task EnsureTimeoutHandlerRunsProcessShutdownEventsAndSkipsProcessKill()
{
int executionCount = 0;
int killCount = 0;
int shutdownCount = 0;

Action<string> killAction = (errorString) => killCount++;
typeof(TimeoutHandler)
.GetField("ProcessKillAction", BindingFlags.NonPublic | BindingFlags.Static)
.SetValue(null, killAction);

// TimeoutHandler at the moment invokes shutdown on 5th call failure.
await TimeoutHandler.ExecuteWithTimeout(
"test",
"account",
new AzureStorageOrchestrationServiceSettings
{
OnImminentFailFast = (errorString) =>
{
shutdownCount++;
return Task.FromResult(false);
}
},
async (operationContext, cancellationToken) =>
{
executionCount++;
await Task.Delay(TimeSpan.FromMinutes(3));
return 1;
});

Assert.AreEqual(5, executionCount);
Assert.AreEqual(1, shutdownCount);
Assert.AreEqual(0, killCount);
}

/// <summary>
/// Ensures that process graceful action is executed before process is killed.
/// </summary>
/// <returns>Task tracking operation.</returns>
[TestMethod]
public async Task EnsureTimeoutHandlerExecutesProcessKillIfGracefulShutdownFails()
{
int executionCount = 0;
int killCount = 0;
int shutdownCount = 0;

Action<string> killAction = (errorString) => killCount++;
typeof(TimeoutHandler)
.GetField("ProcessKillAction", BindingFlags.NonPublic | BindingFlags.Static)
.SetValue(null, killAction);

// TimeoutHandler at the moment invokes shutdown on 5th call failure.
await TimeoutHandler.ExecuteWithTimeout(
"test",
"account",
new AzureStorageOrchestrationServiceSettings
{
OnImminentFailFast = (errorString) =>
{
shutdownCount++;

throw new Exception("Breaking graceful shutdown");
}
},
async (operationContext, cancellationToken) =>
{
executionCount++;
await Task.Delay(TimeSpan.FromMinutes(3));
return 1;
});

Assert.AreEqual(5, executionCount);
Assert.AreEqual(1, shutdownCount);
Assert.AreEqual(1, killCount);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ namespace DurableTask.AzureStorage
using Microsoft.WindowsAzure.Storage.Queue;
using Microsoft.WindowsAzure.Storage.Table;
using System.Runtime.Serialization;
using System.Threading.Tasks;

/// <summary>
/// Settings that impact the runtime behavior of the <see cref="AzureStorageOrchestrationService"/>.
Expand Down Expand Up @@ -214,6 +215,17 @@ public class AzureStorageOrchestrationServiceSettings
/// </summary>
public ILoggerFactory LoggerFactory { get; set; } = NoOpLoggerFactory.Instance;

/// <summary>
/// Gets or sets an optional function to be executed before the app is recycled. Reason for shutdown is passed as a string parameter.
/// This can be used to perform any pending cleanup tasks or just do a graceful shutdown.
/// The function returns a <see cref="bool"/>. If 'true' is returned <see cref="Environment.FailFast(string)"/> is executed, if 'false' is returned,
/// process kill is skipped.
/// A wait time of 35 seconds will be given for the task to finish, if the task does not finish in required time, <see cref="Environment.FailFast(string)"/> will be executed.
/// </summary>
/// <remarks>Skipping process kill by returning false might have negative consequences if since Storage SDK might be in deadlock. Ensure if you return
/// false a process shutdown is executed by you.</remarks>
public Func<string, Task<bool>> OnImminentFailFast { get; set; } = (message) => Task.FromResult(true);

/// <summary>
/// Returns bool indicating is the TrackingStoreStorageAccount has been set.
/// </summary>
Expand Down
38 changes: 35 additions & 3 deletions src/DurableTask.AzureStorage/TimeoutHandler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,11 @@ internal static class TimeoutHandler

private static int NumTimeoutsHit = 0;

/// <summary>
/// Process kill action. This is exposed here to allow override from tests.
/// </summary>
private static Action<string> ProcessKillAction = (errorMessage) => Environment.FailFast(errorMessage);

public static async Task<T> ExecuteWithTimeout<T>(
string operationName,
string account,
Expand Down Expand Up @@ -72,10 +77,37 @@ public static async Task<T> ExecuteWithTimeout<T>(

// Delay to ensure the ETW event gets written
await Task.Delay(TimeSpan.FromSeconds(3));
Environment.FailFast(message);

// Should never be hit, due to above FailFast() call.
return default(T);
bool executeFailFast = true;
Task<bool> gracefulShutdownTask = Task.Run(async () =>
{
try
{
return await settings.OnImminentFailFast(message);
}
catch (Exception)
{
return true;
}
});

await Task.WhenAny(gracefulShutdownTask, Task.Delay(TimeSpan.FromSeconds(35)));

if (gracefulShutdownTask.IsCompleted)
{
executeFailFast = gracefulShutdownTask.Result;
}

if (executeFailFast)
{
TimeoutHandler.ProcessKillAction(message);
}
else
{
// Technically we don't need else as the action above would have killed the process.
// However tests don't kill the process so putting in else.
throw new TimeoutException(message);
}
}

}
Expand Down

0 comments on commit bd61869

Please sign in to comment.