From bd6186992fd0d55372d0f0989f1632efd2ff72d1 Mon Sep 17 00:00:00 2001 From: Ramjot Singh Date: Tue, 4 May 2021 13:17:32 -0700 Subject: [PATCH] DT.AzureStorage: Adding provision to perform graceful shutdown before process kill due to max timeout hits (#536) --- .../TimeoutHandlerTests.cs | 138 ++++++++++++++++++ ...zureStorageOrchestrationServiceSettings.cs | 12 ++ .../TimeoutHandler.cs | 38 ++++- 3 files changed, 185 insertions(+), 3 deletions(-) create mode 100644 Test/DurableTask.AzureStorage.Tests/TimeoutHandlerTests.cs diff --git a/Test/DurableTask.AzureStorage.Tests/TimeoutHandlerTests.cs b/Test/DurableTask.AzureStorage.Tests/TimeoutHandlerTests.cs new file mode 100644 index 000000000..d0d2b06e1 --- /dev/null +++ b/Test/DurableTask.AzureStorage.Tests/TimeoutHandlerTests.cs @@ -0,0 +1,138 @@ +using System; +using System.Collections.Generic; +using System.Reflection; +using System.Text; +using System.Threading.Tasks; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +namespace DurableTask.AzureStorage.Tests +{ + /// + /// Tests for . + /// + [TestClass] + public class TimeoutHandlerTests + { + /// + /// Ensures that process graceful action is executed before process is killed. + /// + /// Task tracking operation. + [TestMethod] + public async Task EnsureTimeoutHandlerRunsProcessShutdownEventsBeforeProcessKill() + { + int executionCount = 0; + int killCount = 0; + int shutdownCount = 0; + + Action killAction = (errorString) => killCount++; + typeof(TimeoutHandler) + .GetField("ProcessKillAction", BindingFlags.NonPublic | BindingFlags.Static) + .SetValue(null, killAction); + + // TimeoutHandler at the moment invokes shutdown on 5th call failure. + await TimeoutHandler.ExecuteWithTimeout( + "test", + "account", + new AzureStorageOrchestrationServiceSettings + { + OnImminentFailFast = (errorString) => + { + shutdownCount++; + return Task.FromResult(true); + } + }, + async (operationContext, cancellationToken) => + { + executionCount++; + await Task.Delay(TimeSpan.FromMinutes(3)); + return 1; + }); + + Assert.AreEqual(5, executionCount); + Assert.AreEqual(1, shutdownCount); + Assert.AreEqual(1, killCount); + } + + /// + /// Ensures that process graceful action is executed and failfast is skipped. + /// + /// Task tracking operation. + [TestMethod] + [ExpectedException(typeof(TimeoutException))] + public async Task EnsureTimeoutHandlerRunsProcessShutdownEventsAndSkipsProcessKill() + { + int executionCount = 0; + int killCount = 0; + int shutdownCount = 0; + + Action killAction = (errorString) => killCount++; + typeof(TimeoutHandler) + .GetField("ProcessKillAction", BindingFlags.NonPublic | BindingFlags.Static) + .SetValue(null, killAction); + + // TimeoutHandler at the moment invokes shutdown on 5th call failure. + await TimeoutHandler.ExecuteWithTimeout( + "test", + "account", + new AzureStorageOrchestrationServiceSettings + { + OnImminentFailFast = (errorString) => + { + shutdownCount++; + return Task.FromResult(false); + } + }, + async (operationContext, cancellationToken) => + { + executionCount++; + await Task.Delay(TimeSpan.FromMinutes(3)); + return 1; + }); + + Assert.AreEqual(5, executionCount); + Assert.AreEqual(1, shutdownCount); + Assert.AreEqual(0, killCount); + } + + /// + /// Ensures that process graceful action is executed before process is killed. + /// + /// Task tracking operation. + [TestMethod] + public async Task EnsureTimeoutHandlerExecutesProcessKillIfGracefulShutdownFails() + { + int executionCount = 0; + int killCount = 0; + int shutdownCount = 0; + + Action killAction = (errorString) => killCount++; + typeof(TimeoutHandler) + .GetField("ProcessKillAction", BindingFlags.NonPublic | BindingFlags.Static) + .SetValue(null, killAction); + + // TimeoutHandler at the moment invokes shutdown on 5th call failure. + await TimeoutHandler.ExecuteWithTimeout( + "test", + "account", + new AzureStorageOrchestrationServiceSettings + { + OnImminentFailFast = (errorString) => + { + shutdownCount++; + + throw new Exception("Breaking graceful shutdown"); + } + }, + async (operationContext, cancellationToken) => + { + executionCount++; + await Task.Delay(TimeSpan.FromMinutes(3)); + return 1; + }); + + Assert.AreEqual(5, executionCount); + Assert.AreEqual(1, shutdownCount); + Assert.AreEqual(1, killCount); + } + } +} diff --git a/src/DurableTask.AzureStorage/AzureStorageOrchestrationServiceSettings.cs b/src/DurableTask.AzureStorage/AzureStorageOrchestrationServiceSettings.cs index d2a43cfd3..c3fd2c6c8 100644 --- a/src/DurableTask.AzureStorage/AzureStorageOrchestrationServiceSettings.cs +++ b/src/DurableTask.AzureStorage/AzureStorageOrchestrationServiceSettings.cs @@ -21,6 +21,7 @@ namespace DurableTask.AzureStorage using Microsoft.WindowsAzure.Storage.Queue; using Microsoft.WindowsAzure.Storage.Table; using System.Runtime.Serialization; + using System.Threading.Tasks; /// /// Settings that impact the runtime behavior of the . @@ -214,6 +215,17 @@ public class AzureStorageOrchestrationServiceSettings /// public ILoggerFactory LoggerFactory { get; set; } = NoOpLoggerFactory.Instance; + /// + /// Gets or sets an optional function to be executed before the app is recycled. Reason for shutdown is passed as a string parameter. + /// This can be used to perform any pending cleanup tasks or just do a graceful shutdown. + /// The function returns a . If 'true' is returned is executed, if 'false' is returned, + /// process kill is skipped. + /// A wait time of 35 seconds will be given for the task to finish, if the task does not finish in required time, will be executed. + /// + /// Skipping process kill by returning false might have negative consequences if since Storage SDK might be in deadlock. Ensure if you return + /// false a process shutdown is executed by you. + public Func> OnImminentFailFast { get; set; } = (message) => Task.FromResult(true); + /// /// Returns bool indicating is the TrackingStoreStorageAccount has been set. /// diff --git a/src/DurableTask.AzureStorage/TimeoutHandler.cs b/src/DurableTask.AzureStorage/TimeoutHandler.cs index 9348510a8..b0ca864b8 100644 --- a/src/DurableTask.AzureStorage/TimeoutHandler.cs +++ b/src/DurableTask.AzureStorage/TimeoutHandler.cs @@ -31,6 +31,11 @@ internal static class TimeoutHandler private static int NumTimeoutsHit = 0; + /// + /// Process kill action. This is exposed here to allow override from tests. + /// + private static Action ProcessKillAction = (errorMessage) => Environment.FailFast(errorMessage); + public static async Task ExecuteWithTimeout( string operationName, string account, @@ -72,10 +77,37 @@ public static async Task ExecuteWithTimeout( // Delay to ensure the ETW event gets written await Task.Delay(TimeSpan.FromSeconds(3)); - Environment.FailFast(message); - // Should never be hit, due to above FailFast() call. - return default(T); + bool executeFailFast = true; + Task gracefulShutdownTask = Task.Run(async () => + { + try + { + return await settings.OnImminentFailFast(message); + } + catch (Exception) + { + return true; + } + }); + + await Task.WhenAny(gracefulShutdownTask, Task.Delay(TimeSpan.FromSeconds(35))); + + if (gracefulShutdownTask.IsCompleted) + { + executeFailFast = gracefulShutdownTask.Result; + } + + if (executeFailFast) + { + TimeoutHandler.ProcessKillAction(message); + } + else + { + // Technically we don't need else as the action above would have killed the process. + // However tests don't kill the process so putting in else. + throw new TimeoutException(message); + } } }