-
Notifications
You must be signed in to change notification settings - Fork 148
/
Copy pathpolicy-3.xml
41 lines (41 loc) · 2.33 KB
/
policy-3.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
<policies>
<inbound>
<base />
<!-- Authenticate to Azure OpenAI with API Management's managed identity -->
<authentication-managed-identity resource="https://cognitiveservices.azure.com" output-token-variable-name="managed-id-access-token" ignore-error="false" />
<set-header name="Authorization" exists-action="override">
<value>@("Bearer " + (string)context.Variables["managed-id-access-token"])</value>
</set-header>
<set-backend-service backend-id="{backend-id}" />
<!--Policy 3 - Limit the tokens per subscription-->
<azure-openai-token-limit counter-key="@(context.Subscription.Id)" tokens-per-minute="{tpm}" estimate-prompt-tokens="false" remaining-tokens-variable-name="remainingTokens" />
<!--Policy 2 - Emit the Azure OpenAI Token Metrics -->
<azure-openai-emit-token-metric namespace="openai">
<dimension name="Subscription ID" value="@(context.Subscription.Id)" />
<dimension name="Client IP" value="@(context.Request.IpAddress)" />
<dimension name="API ID" value="@(context.Api.Id)" />
<dimension name="User ID" value="@(context.Request.Headers.GetValueOrDefault("x-user-id", "N/A"))" />
</azure-openai-emit-token-metric>
</inbound>
<backend>
<!--Policy 1 - Apply load-balancing and retry mechanisms -->
<!--Set count to one less than the number of backends in the pool to try all backends until the backend pool is temporarily unavailable.-->
<retry count="{retry-count}" interval="0" first-fast-retry="true" condition="@(context.Response.StatusCode == 429 || (context.Response.StatusCode == 503 && !context.Response.StatusReason.Contains("Backend pool") && !context.Response.StatusReason.Contains("is temporarily unavailable")))">
<forward-request buffer-request-body="true" />
</retry>
</backend>
<outbound>
<base />
</outbound>
<on-error>
<base />
<choose>
<!--Return a generic error that does not reveal backend pool details.-->
<when condition="@(context.Response.StatusCode == 503)">
<return-response>
<set-status code="503" reason="Service Unavailable" />
</return-response>
</when>
</choose>
</on-error>
</policies>