From 12842e7664049be05fb90c121dee09bfd466928b Mon Sep 17 00:00:00 2001 From: David Justo Date: Thu, 1 Aug 2024 09:48:28 -0700 Subject: [PATCH 01/17] run official build nightly (#2887) --- eng/ci/official-build.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/eng/ci/official-build.yml b/eng/ci/official-build.yml index 91fb7ace7..d0839ba79 100644 --- a/eng/ci/official-build.yml +++ b/eng/ci/official-build.yml @@ -10,6 +10,17 @@ trigger: # CI only, does not trigger on PRs. pr: none +schedules: +# Build nightly to catch any new CVEs and report SDL often. +# We are also required to generated CodeQL reports weekly, so this +# helps us meet that. +- cron: "0 0 * * *" + displayName: Nightly Build + branches: + include: + - main + always: true + resources: repositories: - repository: 1es From c7260c0a8b35bd35752615cae47c17db44f24798 Mon Sep 17 00:00:00 2001 From: David Justo Date: Thu, 8 Aug 2024 10:19:22 -0700 Subject: [PATCH 02/17] Rev version and DTFx.dep (#2889) * rev version and DTFx.dep * mssql-tools to mssql-tools18 --- .../WebJobs.Extensions.DurableTask.csproj | 4 ++-- test/SmokeTests/e2e-test.ps1 | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj b/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj index 0b17fee29..e023c4d4c 100644 --- a/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj +++ b/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj @@ -6,7 +6,7 @@ Microsoft.Azure.WebJobs.Extensions.DurableTask 2 13 - 4 + 5 $(PackageSuffix) $(MajorVersion).$(MinorVersion).$(PatchVersion) $(MajorVersion).0.0.0 @@ -114,7 +114,7 @@ - + diff --git a/test/SmokeTests/e2e-test.ps1 b/test/SmokeTests/e2e-test.ps1 index 845c35eb2..e7a7aa8c1 100644 --- a/test/SmokeTests/e2e-test.ps1 +++ b/test/SmokeTests/e2e-test.ps1 @@ -65,7 +65,7 @@ if ($NoSetup -eq $false) { # Create the database with strict binary collation Write-Host "Creating '$dbname' database with '$collation' collation" -ForegroundColor DarkYellow - docker exec -d mssql-server /opt/mssql-tools/bin/sqlcmd -S . -U sa -P "$pw" -Q "CREATE DATABASE [$dbname] COLLATE $collation" + docker exec -d mssql-server /opt/mssql-tools18/bin/sqlcmd -S . -U sa -P "$pw" -Q "CREATE DATABASE [$dbname] COLLATE $collation" Exit-OnError # Wait for database to be ready From 84e02f4cc86ebc197515519ee5ff145dbdffb529 Mon Sep 17 00:00:00 2001 From: Naiyuan Tian <110135109+nytian@users.noreply.github.com> Date: Fri, 9 Aug 2024 12:17:43 -0700 Subject: [PATCH 03/17] Rev Durable Functions Package at External Client Samples (#2894) * Update ToDoList.csproj * Update DurableClientSampleFunctionApp.csproj --- .../aspnetcore-app/ToDoList.csproj | 2 +- .../functions-app/DurableClientSampleFunctionApp.csproj | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/durable-client-managed-identity/aspnetcore-app/ToDoList.csproj b/samples/durable-client-managed-identity/aspnetcore-app/ToDoList.csproj index fdd756ee8..d37d305c5 100644 --- a/samples/durable-client-managed-identity/aspnetcore-app/ToDoList.csproj +++ b/samples/durable-client-managed-identity/aspnetcore-app/ToDoList.csproj @@ -6,7 +6,7 @@ - + runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/samples/durable-client-managed-identity/functions-app/DurableClientSampleFunctionApp.csproj b/samples/durable-client-managed-identity/functions-app/DurableClientSampleFunctionApp.csproj index cb7299f19..34b19725d 100644 --- a/samples/durable-client-managed-identity/functions-app/DurableClientSampleFunctionApp.csproj +++ b/samples/durable-client-managed-identity/functions-app/DurableClientSampleFunctionApp.csproj @@ -5,7 +5,7 @@ - + From 6cb5cc5814102685094e64a55ad1f885c2bfb14d Mon Sep 17 00:00:00 2001 From: David Justo Date: Mon, 12 Aug 2024 14:19:02 -0700 Subject: [PATCH 04/17] Add codeql GH action, change pack directory in official pipeline (#2896) --- .github/workflows/codeQL.yml | 79 ++++++++++++++++++++++++++++++++++++ eng/templates/build.yml | 2 +- 2 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/codeQL.yml diff --git a/.github/workflows/codeQL.yml b/.github/workflows/codeQL.yml new file mode 100644 index 000000000..6cb68940e --- /dev/null +++ b/.github/workflows/codeQL.yml @@ -0,0 +1,79 @@ +# This workflow generates weekly CodeQL reports for this repo, a security requirements. +# The workflow is adapted from the following reference: https://github.com/Azure-Samples/azure-functions-python-stream-openai/pull/2/files +# Generic comments on how to modify these file are left intactfor future maintenance. + +name: "CodeQL" + +on: + push: + branches: [ "main", "*" ] # TODO: remove development branch after approval + pull_request: + branches: [ "main", "*"] # TODO: remove development branch after approval + schedule: + - cron: '0 0 * * 1' # Weekly Monday run, needed for weekly reports + workflow_call: # allows to be invoked as part of a larger workflow + workflow_dispatch: # allows for the workflow to run manually see: https://docs.github.com/en/actions/using-workflows/manually-running-a-workflow + +env: + solution: WebJobs.Extensions.DurableTask.sln + config: Release + +jobs: + + analyze: + name: Analyze + runs-on: windows-latest + permissions: + actions: read + contents: read + security-events: write + + + strategy: + fail-fast: false + matrix: + language: ['csharp'] + # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] + # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support + + steps: + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + - uses: actions/checkout@v3 + with: + submodules: true + + - name: Setup .NET + uses: actions/setup-dotnet@v3 + + - name: Set up .NET Core 2.1 + uses: actions/setup-dotnet@v3 + with: + dotnet-version: '2.1.x' + + - name: Set up .NET Core 3.1 + uses: actions/setup-dotnet@v3 + with: + dotnet-version: '3.1.x' + + - name: Restore dependencies + run: dotnet restore $solution + + - name: Build + run: dotnet build $solution #--configuration $config #--no-restore -p:FileVersionRevision=$GITHUB_RUN_NUMBER -p:ContinuousIntegrationBuild=true + + # Run CodeQL analysis + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" \ No newline at end of file diff --git a/eng/templates/build.yml b/eng/templates/build.yml index 7bd8ee166..5b5a7bb0c 100644 --- a/eng/templates/build.yml +++ b/eng/templates/build.yml @@ -63,7 +63,7 @@ jobs: command: pack packagesToPack: 'src/**/WebJobs.Extensions.DurableTask.csproj' configuration: Release - packDirectory: 'azure-functions-durable-extension' + packDirectory: $(build.artifactStagingDirectory) nobuild: true # Remove redundant symbol package(s) From 8daed98e5a8fad61ceed90ad2b2da0e06c1b4e7f Mon Sep 17 00:00:00 2001 From: Naiyuan Tian <110135109+nytian@users.noreply.github.com> Date: Mon, 12 Aug 2024 16:09:36 -0700 Subject: [PATCH 05/17] Increase Worker Extension to v1.1.5 (#2897) * Update AssemblyInfo.cs * Update Worker.Extensions.DurableTask.csproj --- src/Worker.Extensions.DurableTask/AssemblyInfo.cs | 4 ++-- .../Worker.Extensions.DurableTask.csproj | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Worker.Extensions.DurableTask/AssemblyInfo.cs b/src/Worker.Extensions.DurableTask/AssemblyInfo.cs index 7f387ee55..e7b781cf0 100644 --- a/src/Worker.Extensions.DurableTask/AssemblyInfo.cs +++ b/src/Worker.Extensions.DurableTask/AssemblyInfo.cs @@ -5,5 +5,5 @@ using Microsoft.Azure.Functions.Worker.Extensions.Abstractions; // TODO: Find a way to generate this dynamically at build-time -[assembly: ExtensionInformation("Microsoft.Azure.WebJobs.Extensions.DurableTask", "2.13.4")] -[assembly: InternalsVisibleTo("Worker.Extensions.DurableTask.Tests, PublicKey=0024000004800000940000000602000000240000525341310004000001000100cd1dabd5a893b40e75dc901fe7293db4a3caf9cd4d3e3ed6178d49cd476969abe74a9e0b7f4a0bb15edca48758155d35a4f05e6e852fff1b319d103b39ba04acbadd278c2753627c95e1f6f6582425374b92f51cca3deb0d2aab9de3ecda7753900a31f70a236f163006beefffe282888f85e3c76d1205ec7dfef7fa472a17b1")] \ No newline at end of file +[assembly: ExtensionInformation("Microsoft.Azure.WebJobs.Extensions.DurableTask", "2.13.5")] +[assembly: InternalsVisibleTo("Worker.Extensions.DurableTask.Tests, PublicKey=0024000004800000940000000602000000240000525341310004000001000100cd1dabd5a893b40e75dc901fe7293db4a3caf9cd4d3e3ed6178d49cd476969abe74a9e0b7f4a0bb15edca48758155d35a4f05e6e852fff1b319d103b39ba04acbadd278c2753627c95e1f6f6582425374b92f51cca3deb0d2aab9de3ecda7753900a31f70a236f163006beefffe282888f85e3c76d1205ec7dfef7fa472a17b1")] diff --git a/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj b/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj index b5c22a516..e6c954cb3 100644 --- a/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj +++ b/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj @@ -29,7 +29,7 @@ ..\..\sign.snk - 1.1.4 + 1.1.5 $(VersionPrefix).0 From d01791972a4b9f439b3e83f8a0e2f3e80a5b1914 Mon Sep 17 00:00:00 2001 From: David Justo Date: Tue, 13 Aug 2024 15:14:04 -0700 Subject: [PATCH 06/17] add net worker steps to 1ES official build (#2898) --- eng/templates/build.yml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/eng/templates/build.yml b/eng/templates/build.yml index 5b5a7bb0c..d61357f0b 100644 --- a/eng/templates/build.yml +++ b/eng/templates/build.yml @@ -54,6 +54,13 @@ jobs: pattern: '*DurableTask.dll' signType: dll + - template: ci/sign-files.yml@eng + parameters: + displayName: Sign assemblies + folderPath: 'src/Worker.Extensions.DurableTask/bin/Release' + pattern: '*DurableTask.dll' + signType: dll + # dotnet pack # Packaging needs to be a separate step from build. # This will automatically pick up the signed DLLs. @@ -66,6 +73,19 @@ jobs: packDirectory: $(build.artifactStagingDirectory) nobuild: true + + # dotnet pack + # Packaging needs to be a separate step from build. + # This will automatically pick up the signed DLLs. + - task: DotNetCoreCLI@2 + displayName: 'dotnet pack Worker.Extensions.DurableTask.csproj' + inputs: + command: pack + packagesToPack: 'src/**/Worker.Extensions.DurableTask.csproj' + configuration: Release + packDirectory: $(build.artifactStagingDirectory) + nobuild: true + # Remove redundant symbol package(s) - script: | echo *** Searching for .symbols.nupkg files to delete... From 5fe3e233823a8dfb8c55617adb3e02b5a2923f10 Mon Sep 17 00:00:00 2001 From: David Justo Date: Wed, 4 Sep 2024 10:58:18 -0700 Subject: [PATCH 07/17] remove dead code, add new ps repo (#2907) --- tools/triageHelper/function_app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/triageHelper/function_app.py b/tools/triageHelper/function_app.py index 12a6d77ff..c2f4d8aa1 100644 --- a/tools/triageHelper/function_app.py +++ b/tools/triageHelper/function_app.py @@ -12,6 +12,7 @@ "Azure/azure-functions-durable-extension", "Azure/azure-functions-durable-js", "Azure/azure-functions-durable-python", + "Azure/azure-functions-durable-powershell", powershell_worker_repo, "microsoft/durabletask-java", "microsoft/durabletask-dotnet", @@ -40,7 +41,6 @@ def get_triage_issues(repository): 'labels': label, } - payload_str = urllib.parse.urlencode(payload, safe=':+') # Define the GitHub API endpoint api_endpoint = f"https://api.github.com/repos/{repository}/issues" query_str1 = "?labels=Needs%3A%20Triage%20%3Amag%3A" From 83bf289fdc7ba96e6dab70408b5eb8ac5d1c74ce Mon Sep 17 00:00:00 2001 From: David Justo Date: Fri, 6 Sep 2024 11:08:11 -0700 Subject: [PATCH 08/17] add dev to list of branches to run on (#2909) --- eng/ci/official-build.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/eng/ci/official-build.yml b/eng/ci/official-build.yml index d0839ba79..e7a871026 100644 --- a/eng/ci/official-build.yml +++ b/eng/ci/official-build.yml @@ -6,6 +6,7 @@ trigger: branches: include: - main + - dev # CI only, does not trigger on PRs. pr: none @@ -19,6 +20,7 @@ schedules: branches: include: - main + - dev always: true resources: From d498cbbe1a5fd3fd81f21a1e9244f38a52f0fbf5 Mon Sep 17 00:00:00 2001 From: David Justo Date: Mon, 16 Sep 2024 10:36:22 -0700 Subject: [PATCH 09/17] Add source link reconstruction (#2913) --- Directory.Build.targets | 37 ++++++++++++++++++++++++++++++ WebJobs.Extensions.DurableTask.sln | 3 ++- 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 Directory.Build.targets diff --git a/Directory.Build.targets b/Directory.Build.targets new file mode 100644 index 000000000..47c2b86a2 --- /dev/null +++ b/Directory.Build.targets @@ -0,0 +1,37 @@ + + + + + + + + + false + <_TranslateUrlPattern>(https://azfunc%40dev\.azure\.com/azfunc/internal/_git|https://dev\.azure\.com/azfunc/internal/_git|https://azfunc\.visualstudio\.com/internal/_git|azfunc%40vs-ssh\.visualstudio\.com:v3/azfunc/internal|git%40ssh\.dev\.azure\.com:v3/azfunc/internal)/([^/\.]+)\.(.+) + <_TranslateUrlReplacement>https://github.com/$2/$3 + + + + + + $([System.Text.RegularExpressions.Regex]::Replace($(ScmRepositoryUrl), $(_TranslateUrlPattern), $(_TranslateUrlReplacement))) + + + + $([System.Text.RegularExpressions.Regex]::Replace(%(SourceRoot.ScmRepositoryUrl), $(_TranslateUrlPattern), $(_TranslateUrlReplacement))) + + + + + \ No newline at end of file diff --git a/WebJobs.Extensions.DurableTask.sln b/WebJobs.Extensions.DurableTask.sln index 353e83805..b710584c2 100644 --- a/WebJobs.Extensions.DurableTask.sln +++ b/WebJobs.Extensions.DurableTask.sln @@ -18,6 +18,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution .editorconfig = .editorconfig azure-pipelines-release-dotnet-isolated.yml = azure-pipelines-release-dotnet-isolated.yml azure-pipelines-release.yml = azure-pipelines-release.yml + Directory.Build.targets = Directory.Build.targets nuget.config = nuget.config README.md = README.md release_notes.md = release_notes.md @@ -94,7 +95,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "PerfTests", "PerfTests", "{ EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DFPerfScenariosV4", "test\DFPerfScenarios\DFPerfScenariosV4.csproj", "{FC8AD123-F949-4D21-B817-E5A4BBF7F69B}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Worker.Extensions.DurableTask.Tests", "test\Worker.Extensions.DurableTask.Tests\Worker.Extensions.DurableTask.Tests.csproj", "{76DEC17C-BF6A-498A-8E8A-7D6CB2E03284}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Worker.Extensions.DurableTask.Tests", "test\Worker.Extensions.DurableTask.Tests\Worker.Extensions.DurableTask.Tests.csproj", "{76DEC17C-BF6A-498A-8E8A-7D6CB2E03284}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution From 11df7322602eac012682ec3a585e0001a9886a9f Mon Sep 17 00:00:00 2001 From: David Justo Date: Mon, 16 Sep 2024 13:28:39 -0700 Subject: [PATCH 10/17] Rev WebJobs Extension to 2.13.6, Rev DTFx.AS dependency to 1.17.5, make 1ES build deterministic, increase CI Azurite version (#2914) --- eng/templates/build.yml | 1 + .../AzureStorageDurabilityProviderFactory.cs | 1 + .../Options/AzureStorageOptions.cs | 14 ++++++++++++++ .../WebJobs.Extensions.DurableTask.csproj | 4 ++-- test/SmokeTests/e2e-test.ps1 | 2 +- 5 files changed, 19 insertions(+), 3 deletions(-) diff --git a/eng/templates/build.yml b/eng/templates/build.yml index d61357f0b..3e3e41040 100644 --- a/eng/templates/build.yml +++ b/eng/templates/build.yml @@ -46,6 +46,7 @@ jobs: solution: '**/WebJobs.Extensions.DurableTask.sln' vsVersion: "16.0" configuration: Release + msbuildArgs: /p:FileVersionRevision=$(Build.BuildId) /p:ContinuousIntegrationBuild=true # these flags make package build deterministic - template: ci/sign-files.yml@eng parameters: diff --git a/src/WebJobs.Extensions.DurableTask/AzureStorageDurabilityProviderFactory.cs b/src/WebJobs.Extensions.DurableTask/AzureStorageDurabilityProviderFactory.cs index 0162d26b4..242bef777 100644 --- a/src/WebJobs.Extensions.DurableTask/AzureStorageDurabilityProviderFactory.cs +++ b/src/WebJobs.Extensions.DurableTask/AzureStorageDurabilityProviderFactory.cs @@ -217,6 +217,7 @@ internal AzureStorageOrchestrationServiceSettings GetAzureStorageOrchestrationSe UseSeparateQueueForEntityWorkItems = this.useSeparateQueueForEntityWorkItems, EntityMessageReorderWindowInMinutes = this.options.EntityMessageReorderWindowInMinutes, MaxEntityOperationBatchSize = this.options.MaxEntityOperationBatchSize, + AllowReplayingTerminalInstances = this.azureStorageOptions.AllowReplayingTerminalInstances, }; if (this.inConsumption) diff --git a/src/WebJobs.Extensions.DurableTask/Options/AzureStorageOptions.cs b/src/WebJobs.Extensions.DurableTask/Options/AzureStorageOptions.cs index 1667aabaf..4a6a506cb 100644 --- a/src/WebJobs.Extensions.DurableTask/Options/AzureStorageOptions.cs +++ b/src/WebJobs.Extensions.DurableTask/Options/AzureStorageOptions.cs @@ -179,6 +179,20 @@ public string TrackingStoreConnectionStringName /// A boolean indicating whether to use the table partition strategy. Defaults to false. public bool UseTablePartitionManagement { get; set; } = false; + /// + /// When false, when an orchestrator is in a terminal state (e.g. Completed, Failed, Terminated), events for that orchestrator are discarded. + /// Otherwise, events for a terminal orchestrator induce a replay. This may be used to recompute the state of the orchestrator in the "Instances Table". + /// + /// + /// Transactions across Azure Tables are not possible, so we independently update the "History table" and then the "Instances table" + /// to set the state of the orchestrator. + /// If a crash were to occur between these two updates, the state of the orchestrator in the "Instances table" would be incorrect. + /// By setting this configuration to true, you can recover from these inconsistencies by forcing a replay of the orchestrator in response + /// to a client event like a termination request or an external event, which gives the framework another opportunity to update the state of + /// the orchestrator in the "Instances table". To force a replay after enabling this configuration, just send any external event to the affected instanceId. + /// + public bool AllowReplayingTerminalInstances { get; set; } = false; + /// /// Throws an exception if the provided hub name violates any naming conventions for the storage provider. /// diff --git a/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj b/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj index e023c4d4c..c6bae32df 100644 --- a/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj +++ b/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj @@ -6,7 +6,7 @@ Microsoft.Azure.WebJobs.Extensions.DurableTask 2 13 - 5 + 6 $(PackageSuffix) $(MajorVersion).$(MinorVersion).$(PatchVersion) $(MajorVersion).0.0.0 @@ -114,7 +114,7 @@ - + diff --git a/test/SmokeTests/e2e-test.ps1 b/test/SmokeTests/e2e-test.ps1 index e7a7aa8c1..5eee1e0fd 100644 --- a/test/SmokeTests/e2e-test.ps1 +++ b/test/SmokeTests/e2e-test.ps1 @@ -26,7 +26,7 @@ function Exit-OnError() { } $ErrorActionPreference = "Stop" -$AzuriteVersion = "3.26.0" +$AzuriteVersion = "3.32.0" if ($NoSetup -eq $false) { # Build the docker image first, since that's the most critical step From 87d38a89884070b526e734c3e18a1564163e4962 Mon Sep 17 00:00:00 2001 From: David Justo Date: Wed, 25 Sep 2024 09:10:18 -0700 Subject: [PATCH 11/17] rev worker extension version to 1.1.6, update dependencies and release notes (#2917) --- release_notes.md | 8 ++++++-- src/Worker.Extensions.DurableTask/AssemblyInfo.cs | 2 +- .../Worker.Extensions.DurableTask.csproj | 6 +++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/release_notes.md b/release_notes.md index ad44bb9b3..9b2804fe4 100644 --- a/release_notes.md +++ b/release_notes.md @@ -1,10 +1,10 @@ # Release Notes -## Microsoft.Azure.Functions.Worker.Extensions.DurableTask 1.2.1 +## Microsoft.Azure.Functions.Worker.Extensions.DurableTask 1.1.6 ### New Features -- Fix regression on `TerminateInstanceAsync` API causing invocations to fail with "unimplemented" exceptions (https://github.com/Azure/azure-functions-durable-extension/pull/2829). +- Support for new `AllowReplayingTerminalInstances` setting in Azure Storage backend (https://github.com/Azure/durabletask/pull/1159), settable via `host.json` ### Bug Fixes @@ -12,6 +12,10 @@ ### Dependency Updates +- Microsoft.DurableTask.Client.Grpc to 1.3.0 +- Microsoft.DurableTask.Worker.Grpc to 1.3.0 +- Microsoft.Azure.WebJobs.Extensions.DurableTask (in host process) to 2.13.6 + ## Microsoft.Azure.WebJobs.Extensions.DurableTask ### New Features diff --git a/src/Worker.Extensions.DurableTask/AssemblyInfo.cs b/src/Worker.Extensions.DurableTask/AssemblyInfo.cs index e7b781cf0..63fc22df6 100644 --- a/src/Worker.Extensions.DurableTask/AssemblyInfo.cs +++ b/src/Worker.Extensions.DurableTask/AssemblyInfo.cs @@ -5,5 +5,5 @@ using Microsoft.Azure.Functions.Worker.Extensions.Abstractions; // TODO: Find a way to generate this dynamically at build-time -[assembly: ExtensionInformation("Microsoft.Azure.WebJobs.Extensions.DurableTask", "2.13.5")] +[assembly: ExtensionInformation("Microsoft.Azure.WebJobs.Extensions.DurableTask", "2.13.6")] [assembly: InternalsVisibleTo("Worker.Extensions.DurableTask.Tests, PublicKey=0024000004800000940000000602000000240000525341310004000001000100cd1dabd5a893b40e75dc901fe7293db4a3caf9cd4d3e3ed6178d49cd476969abe74a9e0b7f4a0bb15edca48758155d35a4f05e6e852fff1b319d103b39ba04acbadd278c2753627c95e1f6f6582425374b92f51cca3deb0d2aab9de3ecda7753900a31f70a236f163006beefffe282888f85e3c76d1205ec7dfef7fa472a17b1")] diff --git a/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj b/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj index e6c954cb3..b310e80da 100644 --- a/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj +++ b/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj @@ -29,7 +29,7 @@ ..\..\sign.snk - 1.1.5 + 1.1.6 $(VersionPrefix).0 @@ -39,8 +39,8 @@ - - + + From 116394414c23855cec5fdd5ef6d22bbb9c8371cc Mon Sep 17 00:00:00 2001 From: Chris Gillum Date: Tue, 1 Oct 2024 06:41:48 -0700 Subject: [PATCH 12/17] Update Microsoft.DurableTask.Grpc to latest version (#2919) --- release_notes.md | 12 ++++-------- ...rosoft.Azure.WebJobs.Extensions.DurableTask.xml | 14 ++++++++++++++ .../ProtobufUtils.cs | 6 +++++- .../WebJobs.Extensions.DurableTask.csproj | 4 ++-- src/Worker.Extensions.DurableTask/AssemblyInfo.cs | 2 +- .../Worker.Extensions.DurableTask.csproj | 2 +- 6 files changed, 27 insertions(+), 13 deletions(-) diff --git a/release_notes.md b/release_notes.md index 9b2804fe4..4d172b565 100644 --- a/release_notes.md +++ b/release_notes.md @@ -1,22 +1,16 @@ # Release Notes -## Microsoft.Azure.Functions.Worker.Extensions.DurableTask 1.1.6 +## Microsoft.Azure.Functions.Worker.Extensions.DurableTask (version) ### New Features -- Support for new `AllowReplayingTerminalInstances` setting in Azure Storage backend (https://github.com/Azure/durabletask/pull/1159), settable via `host.json` - ### Bug Fixes ### Breaking Changes ### Dependency Updates -- Microsoft.DurableTask.Client.Grpc to 1.3.0 -- Microsoft.DurableTask.Worker.Grpc to 1.3.0 -- Microsoft.Azure.WebJobs.Extensions.DurableTask (in host process) to 2.13.6 - -## Microsoft.Azure.WebJobs.Extensions.DurableTask +## Microsoft.Azure.WebJobs.Extensions.DurableTask 2.13.7 ### New Features @@ -25,3 +19,5 @@ ### Breaking Changes ### Dependency Updates + +- Microsoft.DurableTask.Grpc to 1.3.0 diff --git a/src/WebJobs.Extensions.DurableTask/Microsoft.Azure.WebJobs.Extensions.DurableTask.xml b/src/WebJobs.Extensions.DurableTask/Microsoft.Azure.WebJobs.Extensions.DurableTask.xml index 0088042e9..8faf3dfff 100644 --- a/src/WebJobs.Extensions.DurableTask/Microsoft.Azure.WebJobs.Extensions.DurableTask.xml +++ b/src/WebJobs.Extensions.DurableTask/Microsoft.Azure.WebJobs.Extensions.DurableTask.xml @@ -4208,6 +4208,20 @@ A boolean indicating whether to use the table partition strategy. Defaults to false. + + + When false, when an orchestrator is in a terminal state (e.g. Completed, Failed, Terminated), events for that orchestrator are discarded. + Otherwise, events for a terminal orchestrator induce a replay. This may be used to recompute the state of the orchestrator in the "Instances Table". + + + Transactions across Azure Tables are not possible, so we independently update the "History table" and then the "Instances table" + to set the state of the orchestrator. + If a crash were to occur between these two updates, the state of the orchestrator in the "Instances table" would be incorrect. + By setting this configuration to true, you can recover from these inconsistencies by forcing a replay of the orchestrator in response + to a client event like a termination request or an external event, which gives the framework another opportunity to update the state of + the orchestrator in the "Instances table". To force a replay after enabling this configuration, just send any external event to the affected instanceId. + + Throws an exception if the provided hub name violates any naming conventions for the storage provider. diff --git a/src/WebJobs.Extensions.DurableTask/ProtobufUtils.cs b/src/WebJobs.Extensions.DurableTask/ProtobufUtils.cs index 57b84012b..d55bce846 100644 --- a/src/WebJobs.Extensions.DurableTask/ProtobufUtils.cs +++ b/src/WebJobs.Extensions.DurableTask/ProtobufUtils.cs @@ -97,7 +97,11 @@ public static P.HistoryEvent ToHistoryEventProto(HistoryEvent e) }, }, ScheduledStartTimestamp = startedEvent.ScheduledStartTime == null ? null : Timestamp.FromDateTime(startedEvent.ScheduledStartTime.Value), - CorrelationData = startedEvent.Correlation, + ParentTraceContext = startedEvent.ParentTraceContext == null ? null : new P.TraceContext + { + TraceParent = startedEvent.ParentTraceContext.TraceParent, + TraceState = startedEvent.ParentTraceContext.TraceState, + }, }; break; case EventType.ExecutionTerminated: diff --git a/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj b/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj index c6bae32df..05b1a7899 100644 --- a/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj +++ b/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj @@ -6,7 +6,7 @@ Microsoft.Azure.WebJobs.Extensions.DurableTask 2 13 - 6 + 7 $(PackageSuffix) $(MajorVersion).$(MinorVersion).$(PatchVersion) $(MajorVersion).0.0.0 @@ -107,7 +107,7 @@ - + diff --git a/src/Worker.Extensions.DurableTask/AssemblyInfo.cs b/src/Worker.Extensions.DurableTask/AssemblyInfo.cs index 63fc22df6..0be4e12df 100644 --- a/src/Worker.Extensions.DurableTask/AssemblyInfo.cs +++ b/src/Worker.Extensions.DurableTask/AssemblyInfo.cs @@ -5,5 +5,5 @@ using Microsoft.Azure.Functions.Worker.Extensions.Abstractions; // TODO: Find a way to generate this dynamically at build-time -[assembly: ExtensionInformation("Microsoft.Azure.WebJobs.Extensions.DurableTask", "2.13.6")] +[assembly: ExtensionInformation("Microsoft.Azure.WebJobs.Extensions.DurableTask", "2.13.7")] [assembly: InternalsVisibleTo("Worker.Extensions.DurableTask.Tests, PublicKey=0024000004800000940000000602000000240000525341310004000001000100cd1dabd5a893b40e75dc901fe7293db4a3caf9cd4d3e3ed6178d49cd476969abe74a9e0b7f4a0bb15edca48758155d35a4f05e6e852fff1b319d103b39ba04acbadd278c2753627c95e1f6f6582425374b92f51cca3deb0d2aab9de3ecda7753900a31f70a236f163006beefffe282888f85e3c76d1205ec7dfef7fa472a17b1")] diff --git a/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj b/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj index b310e80da..d4b0c48d6 100644 --- a/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj +++ b/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj @@ -29,7 +29,7 @@ ..\..\sign.snk - 1.1.6 + 1.1.7 $(VersionPrefix).0 From 8a5db71b7a1a67edbc72e55f339b4cc6c090ffcb Mon Sep 17 00:00:00 2001 From: David Justo Date: Thu, 3 Oct 2024 14:32:45 -0700 Subject: [PATCH 13/17] Retry platform-level errors in the isolated process for .NET isolated (#2922) Co-authored-by: Andy Staples --- .../smoketest-dotnet-isolated-v4.yml | 78 ++++++++- .../RemoteOrchestratorContext.cs | 33 ++++ .../OutOfProcMiddleware.cs | 23 ++- .../DotNetIsolated/DotNetIsolated.sln | 25 +++ .../DotNetIsolated/FaultyOrchestrators.cs | 165 ++++++++++++++++++ .../OOProcSmokeTests/DotNetIsolated/host.json | 11 +- .../DotNetIsolated/run-smoke-tests.ps1 | 119 +++++++++++++ 7 files changed, 447 insertions(+), 7 deletions(-) create mode 100644 test/SmokeTests/OOProcSmokeTests/DotNetIsolated/DotNetIsolated.sln create mode 100644 test/SmokeTests/OOProcSmokeTests/DotNetIsolated/FaultyOrchestrators.cs create mode 100644 test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 diff --git a/.github/workflows/smoketest-dotnet-isolated-v4.yml b/.github/workflows/smoketest-dotnet-isolated-v4.yml index f818ff7ae..474f48448 100644 --- a/.github/workflows/smoketest-dotnet-isolated-v4.yml +++ b/.github/workflows/smoketest-dotnet-isolated-v4.yml @@ -19,7 +19,79 @@ jobs: steps: - uses: actions/checkout@v2 - # Validation is blocked on https://github.com/Azure/azure-functions-host/issues/7995 - - name: Run V4 .NET Isolated Smoke Test - run: test/SmokeTests/e2e-test.ps1 -DockerfilePath test/SmokeTests/OOProcSmokeTests/DotNetIsolated/Dockerfile -HttpStartPath api/StartHelloCitiesTyped -NoValidation + # Install .NET versions + - name: Set up .NET Core 3.1 + uses: actions/setup-dotnet@v3 + with: + dotnet-version: '3.1.x' + + - name: Set up .NET Core 2.1 + uses: actions/setup-dotnet@v3 + with: + dotnet-version: '2.1.x' + + - name: Set up .NET Core 6.x + uses: actions/setup-dotnet@v3 + with: + dotnet-version: '6.x' + + - name: Set up .NET Core 8.x + uses: actions/setup-dotnet@v3 + with: + dotnet-version: '8.x' + + # Install Azurite + - name: Set up Node.js (needed for Azurite) + uses: actions/setup-node@v3 + with: + node-version: '18.x' # Azurite requires at least Node 18 + + - name: Install Azurite + run: npm install -g azurite + + - name: Restore WebJobs extension + run: dotnet restore $solution + + - name: Build and pack WebJobs extension + run: cd ./src/WebJobs.Extensions.DurableTask && + mkdir ./out && + dotnet build -c Release WebJobs.Extensions.DurableTask.csproj --output ./out && + mkdir ~/packages && + dotnet nuget push ./out/Microsoft.Azure.WebJobs.Extensions.DurableTask.*.nupkg --source ~/packages && + dotnet nuget add source ~/packages + + - name: Build .NET Isolated Smoke Test + run: cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && + dotnet restore --verbosity normal && + dotnet build -c Release + + - name: Install core tools + run: npm i -g azure-functions-core-tools@4 --unsafe-perm true + + # Run smoke tests + # Unlike other smoke tests, the .NET isolated smoke tests run outside of a docker container, but to race conditions + # when building the smoke test app in docker, causing the build to fail. This is a temporary workaround until the + # root cause is identified and fixed. + + - name: Run smoke tests (Hello Cities) + shell: pwsh + run: azurite --silent --blobPort 10000 --queuePort 10001 --tablePort 10002 & + cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 & + ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 -HttpStartPath api/StartHelloCitiesTyped + + - name: Run smoke tests (Process Exit) + shell: pwsh + run: azurite --silent --blobPort 10000 --queuePort 10001 --tablePort 10002 & + ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 -HttpStartPath api/durable_HttpStartProcessExitOrchestrator + + - name: Run smoke tests (Timeout) + shell: pwsh + run: azurite --silent --blobPort 10000 --queuePort 10001 --tablePort 10002 & + cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 & + ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 -HttpStartPath api/durable_HttpStartTimeoutOrchestrator + + - name: Run smoke tests (OOM) shell: pwsh + run: azurite --silent --blobPort 10000 --queuePort 10001 --tablePort 10002 & + cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 & + ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 -HttpStartPath api/durable_HttpStartOOMOrchestrator \ No newline at end of file diff --git a/src/WebJobs.Extensions.DurableTask/ContextImplementations/RemoteOrchestratorContext.cs b/src/WebJobs.Extensions.DurableTask/ContextImplementations/RemoteOrchestratorContext.cs index 9c4d6a02e..86cb21c84 100644 --- a/src/WebJobs.Extensions.DurableTask/ContextImplementations/RemoteOrchestratorContext.cs +++ b/src/WebJobs.Extensions.DurableTask/ContextImplementations/RemoteOrchestratorContext.cs @@ -120,6 +120,31 @@ internal void SetResult(string orchestratorResponseJsonText) this.SetResultInternal(result); } + private void ThrowIfPlatformLevelException(FailureDetails failureDetails) + { + // Recursively inspect the FailureDetails of the failed orchestrator and throw if a platform-level exception is detected. + // + // Today, this method only checks for . In the future, we may want to add more cases. + // Other known platform-level exceptions, like timeouts or process exists due to `Environment.FailFast`, do not yield + // a `OrchestratorExecutionResult` as the isolated invocation is abruptly terminated. Therefore, they don't need to be + // handled in this method. + // However, our tests reveal that OOMs are, surprisngly, caught and returned as a `OrchestratorExecutionResult` + // by the isolated process, and thus need special handling. + // + // It's unclear if all OOMs are caught by the isolated process (probably not), and also if there are other platform-level + // errors that are also caught in the isolated process and returned as a `OrchestratorExecutionResult`. Let's add them + // to this method as we encounter them. + if (failureDetails.InnerFailure?.IsCausedBy() ?? false) + { + throw new SessionAbortedException(failureDetails.ErrorMessage); + } + + if (failureDetails.InnerFailure != null) + { + this.ThrowIfPlatformLevelException(failureDetails.InnerFailure); + } + } + private void SetResultInternal(OrchestratorExecutionResult result) { // Look for an orchestration completion action to see if we need to grab the output. @@ -133,6 +158,14 @@ private void SetResultInternal(OrchestratorExecutionResult result) if (completeAction.OrchestrationStatus == OrchestrationStatus.Failed) { + // If the orchestrator failed due to a platform-level error in the isolated process, + // we should re-throw that exception in the host (this process) invocation pipeline, + // so the invocation can be retried. + if (completeAction.FailureDetails != null) + { + this.ThrowIfPlatformLevelException(completeAction.FailureDetails); + } + string message = completeAction switch { { FailureDetails: { } f } => f.ErrorMessage, diff --git a/src/WebJobs.Extensions.DurableTask/OutOfProcMiddleware.cs b/src/WebJobs.Extensions.DurableTask/OutOfProcMiddleware.cs index 4d514c670..88a7612dc 100644 --- a/src/WebJobs.Extensions.DurableTask/OutOfProcMiddleware.cs +++ b/src/WebJobs.Extensions.DurableTask/OutOfProcMiddleware.cs @@ -138,10 +138,15 @@ await this.LifeCycleNotificationHelper.OrchestratorStartingAsync( byte[] triggerReturnValueBytes = Convert.FromBase64String(triggerReturnValue); P.OrchestratorResponse response = P.OrchestratorResponse.Parser.ParseFrom(triggerReturnValueBytes); + + // TrySetResult may throw if a platform-level error is encountered (like an out of memory exception). context.SetResult( response.Actions.Select(ProtobufUtils.ToOrchestratorAction), response.CustomStatus); + // Here we throw if the orchestrator completed with an application-level error. When we do this, + // the function's result type will be of type `OrchestrationFailureException` which is reserved + // for application-level errors that do not need to be re-tried. context.ThrowIfFailed(); }, #pragma warning restore CS0618 // Type or member is obsolete (not intended for general public use) @@ -159,6 +164,19 @@ await this.LifeCycleNotificationHelper.OrchestratorStartingAsync( // Re-throw so we can abort this invocation. this.HostLifetimeService.OnStopping.ThrowIfCancellationRequested(); } + + // we abort the invocation on "platform level errors" such as: + // - a timeout + // - an out of memory exception + // - a worker process exit + if (functionResult.Exception is Host.FunctionTimeoutException + || functionResult.Exception?.InnerException is SessionAbortedException // see RemoteOrchestrationContext.TrySetResultInternal for details on OOM-handling + || (functionResult.Exception?.InnerException?.GetType().ToString().Contains("WorkerProcessExitException") ?? false)) + { + // TODO: the `WorkerProcessExitException` type is not exposed in our dependencies, it's part of WebJobs.Host.Script. + // Should we add that dependency or should it be exposed in WebJobs.Host? + throw functionResult.Exception; + } } catch (Exception hostRuntimeException) { @@ -238,8 +256,7 @@ await this.LifeCycleNotificationHelper.OrchestratorFailedAsync( else { // the function failed for some other reason - - string exceptionDetails = functionResult.Exception.ToString(); + string exceptionDetails = functionResult.Exception?.ToString() ?? "Framework-internal message: exception details could not be extracted"; this.TraceHelper.FunctionFailed( this.Options.HubName, @@ -258,7 +275,7 @@ await this.LifeCycleNotificationHelper.OrchestratorFailedAsync( orchestratorResult = OrchestratorExecutionResult.ForFailure( message: $"Function '{functionName}' failed with an unhandled exception.", - functionResult.Exception); + functionResult.Exception ?? new Exception($"Function '{functionName}' failed with an unknown unhandled exception")); } // Send the result of the orchestrator function to the DTFx dispatch pipeline. diff --git a/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/DotNetIsolated.sln b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/DotNetIsolated.sln new file mode 100644 index 000000000..a93cc6f6e --- /dev/null +++ b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/DotNetIsolated.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.5.002.0 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "DotNetIsolated", "DotNetIsolated.csproj", "{B2DBA49D-9D25-46DB-8968-15D5E83B4060}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {B2DBA49D-9D25-46DB-8968-15D5E83B4060}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {B2DBA49D-9D25-46DB-8968-15D5E83B4060}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B2DBA49D-9D25-46DB-8968-15D5E83B4060}.Release|Any CPU.ActiveCfg = Release|Any CPU + {B2DBA49D-9D25-46DB-8968-15D5E83B4060}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {0954D7B4-582F-4F85-AE3E-5D503FB07DB1} + EndGlobalSection +EndGlobal diff --git a/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/FaultyOrchestrators.cs b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/FaultyOrchestrators.cs new file mode 100644 index 000000000..8332fa436 --- /dev/null +++ b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/FaultyOrchestrators.cs @@ -0,0 +1,165 @@ +using Microsoft.Azure.Functions.Worker; +using Microsoft.Azure.Functions.Worker.Http; +using Microsoft.DurableTask; +using Microsoft.DurableTask.Client; +using Microsoft.Extensions.Logging; +using System; + +namespace FaultOrchestrators +{ + public static class FaultyOrchestrators + { + [Function(nameof(OOMOrchestrator))] + public static Task OOMOrchestrator( + [OrchestrationTrigger] TaskOrchestrationContext context) + { + // this orchestrator is not deterministic, on purpose. + // we use the non-determinism to force an OOM exception on only the first replay + + // check if a file named "replayEvidence" exists in source code directory, create it if it does not. + // From experience, this code runs in `/bin/output/`, so we store the file two directories above. + // We do this because the /bin/output/ directory gets overridden during the build process, which happens automatically + // when `func host start` is re-invoked. + string evidenceFile = System.IO.Path.Combine(System.IO.Directory.GetCurrentDirectory(), "..", "..", "replayEvidence"); + bool isTheFirstReplay = !System.IO.File.Exists(evidenceFile); + if (isTheFirstReplay) + { + System.IO.File.Create(evidenceFile).Close(); + + // force the process to run out of memory + List data = new List(); + + for (int i = 0; i < 10000000; i++) + { + data.Add(new byte[1024 * 1024 * 1024]); + } + + // we expect the code to never reach this statement, it should OOM. + // we throw just in case the code does not time out. This should fail the test + throw new Exception("this should never be reached"); + } + else { + // if it's not the first replay, delete the evidence file and return + System.IO.File.Delete(evidenceFile); + return Task.CompletedTask; + } + } + + [Function(nameof(ProcessExitOrchestrator))] + public static Task ProcessExitOrchestrator( + [OrchestrationTrigger] TaskOrchestrationContext context) + { + // this orchestrator is not deterministic, on purpose. + // we use the non-determinism to force a sudden process exit on only the first replay + + // check if a file named "replayEvidence" exists in source code directory, create it if it does not. + // From experience, this code runs in `/bin/output/`, so we store the file two directories above. + // We do this because the /bin/output/ directory gets overridden during the build process, which happens automatically + // when `func host start` is re-invoked. + string evidenceFile = System.IO.Path.Combine(System.IO.Directory.GetCurrentDirectory(), "..", "..", "replayEvidence"); + bool isTheFirstReplay = !System.IO.File.Exists(evidenceFile); + if (isTheFirstReplay) + { + System.IO.File.Create(evidenceFile).Close(); + + // force sudden crash + Environment.FailFast("Simulating crash!"); + throw new Exception("this should never be reached"); + } + else { + // if it's not the first replay, delete the evidence file and return + System.IO.File.Delete(evidenceFile); + return Task.CompletedTask; + } + } + + [Function(nameof(TimeoutOrchestrator))] + public static Task TimeoutOrchestrator( + [OrchestrationTrigger] TaskOrchestrationContext context) + { + // this orchestrator is not deterministic, on purpose. + // we use the non-determinism to force a timeout on only the first replay + + // check if a file named "replayEvidence" exists in source code directory, create it if it does not. + // From experience, this code runs in `/bin/output/`, so we store the file two directories above. + // We do this because the /bin/output/ directory gets overridden during the build process, which happens automatically + // when `func host start` is re-invoked. + string evidenceFile = System.IO.Path.Combine(System.IO.Directory.GetCurrentDirectory(), "..", "..", "replayEvidence"); + bool isTheFirstReplay = !System.IO.File.Exists(evidenceFile); + + if (isTheFirstReplay) + { + System.IO.File.Create(evidenceFile).Close(); + + // force the process to timeout after a 1 minute wait + System.Threading.Thread.Sleep(TimeSpan.FromMinutes(1)); + + // we expect the code to never reach this statement, it should time out. + // we throw just in case the code does not time out. This should fail the test + throw new Exception("this should never be reached"); + } + else { + // if it's not the first replay, delete the evidence file and return + System.IO.File.Delete(evidenceFile); + return Task.CompletedTask; + } + } + + [Function("durable_HttpStartOOMOrchestrator")] + public static async Task HttpStartOOMOrchestrator( + [HttpTrigger(AuthorizationLevel.Anonymous, "get", "post")] HttpRequestData req, + [DurableClient] DurableTaskClient client, + FunctionContext executionContext) + { + ILogger logger = executionContext.GetLogger("durable_HttpStartOOMOrchestrator"); + + // Function input comes from the request content. + string instanceId = await client.ScheduleNewOrchestrationInstanceAsync( + nameof(OOMOrchestrator)); + + logger.LogInformation("Started orchestration with ID = '{instanceId}'.", instanceId); + + // Returns an HTTP 202 response with an instance management payload. + // See https://learn.microsoft.com/azure/azure-functions/durable/durable-functions-http-api#start-orchestration + return await client.CreateCheckStatusResponseAsync(req, instanceId); + } + + [Function("durable_HttpStartProcessExitOrchestrator")] + public static async Task HttpStartProcessExitOrchestrator( + [HttpTrigger(AuthorizationLevel.Anonymous, "get", "post")] HttpRequestData req, + [DurableClient] DurableTaskClient client, + FunctionContext executionContext) + { + ILogger logger = executionContext.GetLogger("durable_HttpStartProcessExitOrchestrator"); + + // Function input comes from the request content. + string instanceId = await client.ScheduleNewOrchestrationInstanceAsync( + nameof(ProcessExitOrchestrator)); + + logger.LogInformation("Started orchestration with ID = '{instanceId}'.", instanceId); + + // Returns an HTTP 202 response with an instance management payload. + // See https://learn.microsoft.com/azure/azure-functions/durable/durable-functions-http-api#start-orchestration + return await client.CreateCheckStatusResponseAsync(req, instanceId); + } + + [Function("durable_HttpStartTimeoutOrchestrator")] + public static async Task HttpStartTimeoutOrchestrator( + [HttpTrigger(AuthorizationLevel.Anonymous, "get", "post")] HttpRequestData req, + [DurableClient] DurableTaskClient client, + FunctionContext executionContext) + { + ILogger logger = executionContext.GetLogger("durable_HttpStartTimeoutOrchestrator"); + + // Function input comes from the request content. + string instanceId = await client.ScheduleNewOrchestrationInstanceAsync( + nameof(TimeoutOrchestrator)); + + logger.LogInformation("Started orchestration with ID = '{instanceId}'.", instanceId); + + // Returns an HTTP 202 response with an instance management payload. + // See https://learn.microsoft.com/azure/azure-functions/durable/durable-functions-http-api#start-orchestration + return await client.CreateCheckStatusResponseAsync(req, instanceId); + } + } +} diff --git a/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/host.json b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/host.json index 278b52cde..0ec9c6a89 100644 --- a/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/host.json +++ b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/host.json @@ -7,5 +7,14 @@ "excludedTypes": "Request" } } - } + }, + "extensions": { + "durableTask": { + "storageProvider": { + "maxQueuePollingInterval": "00:00:01", + "controlQueueVisibilityTimeout": "00:01:00" + } + } + }, + "functionTimeout": "00:00:30" } \ No newline at end of file diff --git a/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 new file mode 100644 index 000000000..79d679b80 --- /dev/null +++ b/test/SmokeTests/OOProcSmokeTests/DotNetIsolated/run-smoke-tests.ps1 @@ -0,0 +1,119 @@ +# This is a simple test runner to validate the .NET isolated smoke tests. +# It supercedes the usual e2e-tests.ps1 script for the .NET isolated scenario because building the snmoke test app +# on the docker image is unreliable. For more details, see: https://github.com/Azure/azure-functions-host/issues/7995 + +# This script is designed specifically to test cases where the isolated worker process experiences a platform failure: +# timeouts, OOMs, etc. For that reason, it is careful to check that the Functions Host is running and healthy at regular +# intervals. This makes these tests run more slowly than other test categories. + +param( + [Parameter(Mandatory=$true)] + [string]$HttpStartPath +) + +$retryCount = 0; +$statusUrl = $null; +$success = $false; +$haveManuallyRestartedHost = $false; + +Do { + $testIsRunning = $true; + + # Start the functions host if it's not running already. + # Then give it up to 1 minute to start up. + # This is a long wait, but from experience the CI can be slow to start up the host, especially after a platform-error. + $isFunctionsHostRunning = (Get-Process -Name func -ErrorAction SilentlyContinue) + if ($isFunctionsHostRunning -eq $null) { + Write-Host "Starting the Functions host..." -ForegroundColor Yellow + + # The '&' operator is used to run the command in the background + cd ./test/SmokeTests/OOProcSmokeTests/DotNetIsolated && func host start --port 7071 & + Write-Host "Waiting for the Functions host to start up..." -ForegroundColor Yellow + Start-Sleep -Seconds 60 + } + + + try { + # Make sure the Functions runtime is up and running + $pingUrl = "http://localhost:7071/admin/host/ping" + Write-Host "Pinging app at $pingUrl to ensure the host is healthy" -ForegroundColor Yellow + Invoke-RestMethod -Method Post -Uri "http://localhost:7071/admin/host/ping" + Write-Host "Host is healthy!" -ForegroundColor Green + + # Start orchestrator if it hasn't been started yet + if ($statusUrl -eq $null){ + $startOrchestrationUri = "http://localhost:7071/$HttpStartPath" + Write-Host "Starting a new orchestration instance via POST to $startOrchestrationUri..." -ForegroundColor Yellow + + $result = Invoke-RestMethod -Method Post -Uri $startOrchestrationUri + Write-Host "Started orchestration with instance ID '$($result.id)'!" -ForegroundColor Yellow + Write-Host "Waiting for orchestration to complete..." -ForegroundColor Yellow + + $statusUrl = $result.statusQueryGetUri + + # sleep for a bit to give the orchestrator a chance to start, + # then loop once more in case the orchestrator ran quickly, made the host unhealthy, + # and the functions host needs to be restarted + Start-Sleep -Seconds 5 + continue; + } + + # Check the orchestrator status + $result = Invoke-RestMethod -Method Get -Uri $statusUrl + $runtimeStatus = $result.runtimeStatus + Write-Host "Orchestration is $runtimeStatus" -ForegroundColor Yellow + Write-Host $result + + if ($result.runtimeStatus -eq "Completed") { + $success = $true + $testIsRunning = $false + break + } + if ($result.runtimeStatus -eq "Failed") { + $success = $false + $testIsRunning = $false + break + } + + # If the orchestrator did not complete yet, wait for a bit before checking again + Start-Sleep -Seconds 2 + $retryCount = $retryCount + 1 + + } catch { + # we expect to enter this 'catch' block if any of our HTTP requests to the host fail. + # Some failures observed during development include: + # - The host is not running/was restarting/was killed + # - The host is running but not healthy (OOMs may cause this), so it needs to be forcibly restarted + Write-Host "An error occurred:" -ForegroundColor Red + Write-Host $_ -ForegroundColor Red + + # When testing for platform errors, we want to make sure the Functions host is healthy and ready to take requests. + # The Host can get into bad states (for example, in an OOM-inducing test) where it does not self-heal. + # For these cases, we manually restart the host to ensure it is in a good state. We only do this once per test. + if ($haveManuallyRestartedHost -eq $false) { + + # We stop the host process and wait for a bit before checking if it is running again. + Write-Host "Restarting the Functions host..." -ForegroundColor Yellow + Stop-Process -Name "func" -Force + Start-Sleep -Seconds 5 + + # Log whether the process kill succeeded + $haveManuallyRestartedHost = $true + $isFunctionsHostRunning = ((Get-Process -Name func -ErrorAction SilentlyContinue) -eq $null) + Write-Host "Host process killed: $isFunctionsHostRunning" -ForegroundColor Yellow + + # the beginning of the loop will restart the host + continue + } + + # Rethrow the original exception + throw + } + +} while (($testIsRunning -eq $true) -and ($retryCount -lt 65)) + +if ($success -eq $false) { + throw "Orchestration failed or did not compete in time! :(" +} + +Write-Host "Success!" -ForegroundColor Green \ No newline at end of file From 9990c2f88786bb7b30f3ceced84ec93d02e784ae Mon Sep 17 00:00:00 2001 From: Andy Staples Date: Wed, 9 Oct 2024 11:41:16 -0600 Subject: [PATCH 14/17] Bump System.Text.Json - System.Text.Json from 6.0.0 -> 6.0.10 --- .../Worker.Extensions.DurableTask.csproj | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj b/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj index e6c954cb3..e51a9ee94 100644 --- a/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj +++ b/src/Worker.Extensions.DurableTask/Worker.Extensions.DurableTask.csproj @@ -45,6 +45,7 @@ + From 16169ec047f2744437d167c97a6932a2cf74a539 Mon Sep 17 00:00:00 2001 From: Naiyuan Tian <110135109+nytian@users.noreply.github.com> Date: Thu, 17 Oct 2024 13:01:18 -0700 Subject: [PATCH 15/17] Update DTFx Dependency version DTFx.Core 2.17.1 -> 3.0.0 DTFx.ApplicationInsights 0.1.* -> 0.2.* DTFx.AzureStorage 2.0.0-rc.2 -> 2.0.1 --- .../WebJobs.Extensions.DurableTask.csproj | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj b/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj index 831c79089..e3cbe4325 100644 --- a/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj +++ b/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj @@ -54,13 +54,13 @@ - - + + - + From be467324ef9cb44c6ef5497255af615bff0b593d Mon Sep 17 00:00:00 2001 From: Naiyuan Tian <110135109+nytian@users.noreply.github.com> Date: Thu, 17 Oct 2024 13:49:32 -0700 Subject: [PATCH 16/17] Update Azure.Identity ver to 1.13.0 --- .../WebJobs.Extensions.DurableTask.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj b/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj index e3cbe4325..f3f577d47 100644 --- a/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj +++ b/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj @@ -51,7 +51,7 @@ - + From c4ae9fb8ea9af1788a1dadaf268a8b413e63ba73 Mon Sep 17 00:00:00 2001 From: Naiyuan Tian <110135109+nytian@users.noreply.github.com> Date: Thu, 17 Oct 2024 15:44:00 -0700 Subject: [PATCH 17/17] Update WebJobs.Extensions.DurableTask.csproj --- .../WebJobs.Extensions.DurableTask.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj b/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj index f3f577d47..e3cbe4325 100644 --- a/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj +++ b/src/WebJobs.Extensions.DurableTask/WebJobs.Extensions.DurableTask.csproj @@ -51,7 +51,7 @@ - +